PROJECT
In [40]:
# get information of the datatypes
# if there is any class data in the features,convert them into binary form
# look how ages of the data are looking and some information
# get the descriptive statistics of the above data
# construct the histogram, boxplot for every column using for loop
# get the corelation heatmap and find if there is association between certain columns
# choose the specific columns which have certain association and get the pairplot
In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
In [42]:
r=pd.read_csv("C:\\Users\\ruchi\\Downloads\\Minsk2020_ALS_dataset.csv")
In [43]:
r
Out[43]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8 | M | 58 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | 3.770575 | ... | -0.024467 | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 | 1 |
| 1 | 20 | F | 57 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | 1.179851 | ... | 0.002485 | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 | 1 |
| 2 | 21 | F | 58 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | 1.071950 | ... | -0.013927 | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 | 1 |
| 3 | 22 | F | 70 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | 1.915058 | ... | -0.019285 | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 | 1 |
| 4 | 24 | M | 66 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | 1.420891 | ... | -0.005743 | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59 | 123 | M | 43 | 0.255799 | 0.123679 | 0.182658 | 0.505591 | 6.222031 | 2.876602 | 3.894294 | ... | 0.220533 | 0.089766 | -0.120838 | -0.004221 | -0.013165 | 0.004642 | 9.855665 | 3128.341308 | 1990.937097 | 0 |
| 60 | 125 | M | 63 | 0.513175 | 0.296489 | 0.334845 | 0.729804 | 9.686563 | 4.327943 | 5.687977 | ... | 0.028016 | -0.038739 | 0.011588 | -0.011281 | -0.004294 | 0.011239 | 11.094558 | 1964.218942 | 601.076046 | 0 |
| 61 | 127 | F | 67 | 0.383901 | 0.245923 | 0.251359 | 0.415136 | 4.148414 | 2.069757 | 2.527213 | ... | 0.011685 | 0.007883 | -0.014839 | 0.013859 | 0.011145 | 0.001418 | 12.564742 | 2526.285657 | 934.343638 | 0 |
| 62 | 129 | F | 68 | 1.336216 | 0.815757 | 0.733197 | 0.981928 | 11.224542 | 5.295879 | 6.994751 | ... | 0.015712 | 0.013437 | 0.025113 | 0.008852 | -0.010132 | -0.008458 | 10.670669 | 3201.250289 | 2284.051658 | 0 |
| 63 | 131 | F | 60 | 0.916706 | 0.566121 | 0.512857 | 1.467165 | 6.372832 | 3.251168 | 3.539229 | ... | -0.046235 | 0.041946 | -0.065313 | -0.016682 | 0.061026 | -0.005883 | 6.972152 | 2792.655884 | 1518.529172 | 0 |
64 rows × 135 columns
In [44]:
r=pd.DataFrame(r)
In [45]:
r.head() ##first five rows
Out[45]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8 | M | 58 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | 3.770575 | ... | -0.024467 | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 | 1 |
| 1 | 20 | F | 57 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | 1.179851 | ... | 0.002485 | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 | 1 |
| 2 | 21 | F | 58 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | 1.071950 | ... | -0.013927 | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 | 1 |
| 3 | 22 | F | 70 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | 1.915058 | ... | -0.019285 | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 | 1 |
| 4 | 24 | M | 66 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | 1.420891 | ... | -0.005743 | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 | 1 |
5 rows × 135 columns
In [46]:
r.tail() ##last five rows
Out[46]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 59 | 123 | M | 43 | 0.255799 | 0.123679 | 0.182658 | 0.505591 | 6.222031 | 2.876602 | 3.894294 | ... | 0.220533 | 0.089766 | -0.120838 | -0.004221 | -0.013165 | 0.004642 | 9.855665 | 3128.341308 | 1990.937097 | 0 |
| 60 | 125 | M | 63 | 0.513175 | 0.296489 | 0.334845 | 0.729804 | 9.686563 | 4.327943 | 5.687977 | ... | 0.028016 | -0.038739 | 0.011588 | -0.011281 | -0.004294 | 0.011239 | 11.094558 | 1964.218942 | 601.076046 | 0 |
| 61 | 127 | F | 67 | 0.383901 | 0.245923 | 0.251359 | 0.415136 | 4.148414 | 2.069757 | 2.527213 | ... | 0.011685 | 0.007883 | -0.014839 | 0.013859 | 0.011145 | 0.001418 | 12.564742 | 2526.285657 | 934.343638 | 0 |
| 62 | 129 | F | 68 | 1.336216 | 0.815757 | 0.733197 | 0.981928 | 11.224542 | 5.295879 | 6.994751 | ... | 0.015712 | 0.013437 | 0.025113 | 0.008852 | -0.010132 | -0.008458 | 10.670669 | 3201.250289 | 2284.051658 | 0 |
| 63 | 131 | F | 60 | 0.916706 | 0.566121 | 0.512857 | 1.467165 | 6.372832 | 3.251168 | 3.539229 | ... | -0.046235 | 0.041946 | -0.065313 | -0.016682 | 0.061026 | -0.005883 | 6.972152 | 2792.655884 | 1518.529172 | 0 |
5 rows × 135 columns
In [47]:
r.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 64 entries, 0 to 63 Columns: 135 entries, ID to Diagnosis (ALS) dtypes: float64(131), int64(3), object(1) memory usage: 67.6+ KB
In [48]:
r.shape
Out[48]:
(64, 135)
In [49]:
r.isnull().sum().sort_values(ascending=False) ##if null values are there are not
Out[49]:
ID 0
Hi(3)_{mu} 0
Hi(1)_{rel} 0
Hi(8)_{sd} 0
Hi(7)_{sd} 0
..
Ha(5)_{rel} 0
Ha(4)_{rel} 0
Ha(3)_{rel} 0
Ha(2)_{rel} 0
Diagnosis (ALS) 0
Length: 135, dtype: int64
In [50]:
r.columns
Out[50]:
Index(['ID', 'Sex', 'Age', 'J1_a', 'J3_a', 'J5_a', 'J55_a', 'S1_a', 'S3_a',
'S5_a',
...
'dCCi(7)', 'dCCi(8)', 'dCCi(9)', 'dCCi(10)', 'dCCi(11)', 'dCCi(12)',
'd_1', 'F2_i', 'F2_{conv}', 'Diagnosis (ALS)'],
dtype='object', length=135)
In [51]:
for i in r.columns: ##see the all columns names eith data types
print(f"'{i}' data type:{r[i].dtypes}")
'ID' data type:int64
'Sex' data type:object
'Age' data type:int64
'J1_a' data type:float64
'J3_a' data type:float64
'J5_a' data type:float64
'J55_a' data type:float64
'S1_a' data type:float64
'S3_a' data type:float64
'S5_a' data type:float64
'S11_a' data type:float64
'S55_a' data type:float64
'DPF_a' data type:float64
'PFR_a' data type:float64
'PPE_a' data type:float64
'PVI_a' data type:float64
'HNR_a' data type:float64
'GNEa_{\mu}' data type:float64
'GNEa_{\sigma}' data type:float64
'Ha(1)_{mu}' data type:float64
'Ha(2)_{mu}' data type:float64
'Ha(3)_{mu}' data type:float64
'Ha(4)_{mu}' data type:float64
'Ha(5)_{mu}' data type:float64
'Ha(6)_{mu}' data type:float64
'Ha(7)_{mu}' data type:float64
'Ha(8)_{mu}' data type:float64
'Ha(1)_{sd}' data type:float64
'Ha(2)_{sd}' data type:float64
'Ha(3)_{sd}' data type:float64
'Ha(4)_{sd}' data type:float64
'Ha(5)_{sd}' data type:float64
'Ha(6)_{sd}' data type:float64
'Ha(7)_{sd}' data type:float64
'Ha(8)_{sd}' data type:float64
'Ha(1)_{rel}' data type:float64
'Ha(2)_{rel}' data type:float64
'Ha(3)_{rel}' data type:float64
'Ha(4)_{rel}' data type:float64
'Ha(5)_{rel}' data type:float64
'Ha(6)_{rel}' data type:float64
'Ha(7)_{rel}' data type:float64
'Ha(8)_{rel}' data type:float64
'CCa(1)' data type:float64
'CCa(2)' data type:float64
'CCa(3)' data type:float64
'CCa(4)' data type:float64
'CCa(5)' data type:float64
'CCa(6)' data type:float64
'CCa(7)' data type:float64
'CCa(8)' data type:float64
'CCa(9)' data type:float64
'CCa(10)' data type:float64
'CCa(11)' data type:float64
'CCa(12)' data type:float64
'dCCa(1)' data type:float64
'dCCa(2)' data type:float64
'dCCa(3)' data type:float64
'dCCa(4)' data type:float64
'dCCa(5)' data type:float64
'dCCa(6)' data type:float64
'dCCa(7)' data type:float64
'dCCa(8)' data type:float64
'dCCa(9)' data type:float64
'dCCa(10)' data type:float64
'dCCa(11)' data type:float64
'dCCa(12)' data type:float64
'J1_i' data type:float64
'J3_i' data type:float64
'J5_i' data type:float64
'J55_i' data type:float64
'S1_i' data type:float64
'S3_i' data type:float64
'S5_i' data type:float64
'S11_i' data type:float64
'S55_i' data type:float64
'DPF_i' data type:float64
'PFR_i' data type:float64
'PPE_i' data type:float64
'PVI_i' data type:float64
'HNR_i' data type:float64
'GNEi_{\mu}' data type:float64
'GNEi_{\sigma}' data type:float64
'Hi(1)_{mu}' data type:float64
'Hi(2)_{mu}' data type:float64
'Hi(3)_{mu}' data type:float64
'Hi(4)_{mu}' data type:float64
'Hi(5)_{mu}' data type:float64
'Hi(6)_{mu}' data type:float64
'Hi(7)_{mu}' data type:float64
'Hi(8)_{mu}' data type:float64
'Hi(1)_{sd}' data type:float64
'Hi(2)_{sd}' data type:float64
'Hi(3)_{sd}' data type:float64
'Hi(4)_{sd}' data type:float64
'Hi(5)_{sd}' data type:float64
'Hi(6)_{sd}' data type:float64
'Hi(7)_{sd}' data type:float64
'Hi(8)_{sd}' data type:float64
'Hi(1)_{rel}' data type:float64
'Hi(2)_{rel}' data type:float64
'Hi(3)_{rel}' data type:float64
'Hi(4)_{rel}' data type:float64
'Hi(5)_{rel}' data type:float64
'Hi(6)_{rel}' data type:float64
'Hi(7)_{rel}' data type:float64
'Hi(8)_{rel}' data type:float64
'CCi(1)' data type:float64
'CCi(2)' data type:float64
'CCi(3)' data type:float64
'CCi(4)' data type:float64
'CCi(5)' data type:float64
'CCi(6)' data type:float64
'CCi(7)' data type:float64
'CCi(8)' data type:float64
'CCi(9)' data type:float64
'CCi(10)' data type:float64
'CCi(11)' data type:float64
'CCi(12)' data type:float64
'dCCi(1)' data type:float64
'dCCi(2)' data type:float64
'dCCi(3)' data type:float64
'dCCi(4)' data type:float64
'dCCi(5)' data type:float64
'dCCi(6)' data type:float64
'dCCi(7)' data type:float64
'dCCi(8)' data type:float64
'dCCi(9)' data type:float64
'dCCi(10)' data type:float64
'dCCi(11)' data type:float64
'dCCi(12)' data type:float64
'd_1' data type:float64
'F2_i' data type:float64
'F2_{conv}' data type:float64
'Diagnosis (ALS)' data type:int64
In [13]:
r["Sex"].value_counts() ##showing the count of gender
Out[13]:
Sex F 34 M 30 Name: count, dtype: int64
In [14]:
print(r.dtypes) ##data type
ID int64
Sex object
Age int64
J1_a float64
J3_a float64
...
dCCi(12) float64
d_1 float64
F2_i float64
F2_{conv} float64
Diagnosis (ALS) int64
Length: 135, dtype: object
In [15]:
x=r.iloc[:,:-1] ##spliting it into training and testing data
y=r.iloc[:,-1]
print("training data:",x)
print(" "*3)
print("testing data:",y)
training data: ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a \
0 8 M 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477
1 20 F 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639
2 21 F 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743
3 22 F 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926
4 24 M 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109
.. ... .. ... ... ... ... ... ... ...
59 123 M 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602
60 125 M 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943
61 127 F 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757
62 129 F 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879
63 131 F 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168
S5_a ... dCCi(6) dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) \
0 3.770575 ... 0.016809 -0.024467 -0.005300 0.051874 -0.037710 -0.026549
1 1.179851 ... 0.019235 0.002485 -0.004535 -0.000225 -0.006977 -0.012510
2 1.071950 ... 0.007199 -0.013927 0.007908 0.007960 -0.009022 -0.012488
3 1.915058 ... 0.013213 -0.019285 -0.021768 0.020495 0.035976 -0.034648
4 1.420891 ... 0.002948 -0.005743 0.004726 -0.015247 0.003900 -0.007686
.. ... ... ... ... ... ... ... ...
59 3.894294 ... -0.060395 0.220533 0.089766 -0.120838 -0.004221 -0.013165
60 5.687977 ... -0.046223 0.028016 -0.038739 0.011588 -0.011281 -0.004294
61 2.527213 ... -0.005008 0.011685 0.007883 -0.014839 0.013859 0.011145
62 6.994751 ... 0.025679 0.015712 0.013437 0.025113 0.008852 -0.010132
63 3.539229 ... -0.072828 -0.046235 0.041946 -0.065313 -0.016682 0.061026
dCCi(12) d_1 F2_i F2_{conv}
0 -0.021149 4.825476 2526.285657 833.498083
1 0.014773 5.729322 1985.712014 561.802625
2 -0.015588 8.258488 2364.695972 796.723440
3 0.008021 5.447137 1860.172768 359.409974
4 -0.003784 8.562517 2051.627447 817.111847
.. ... ... ... ...
59 0.004642 9.855665 3128.341308 1990.937097
60 0.011239 11.094558 1964.218942 601.076046
61 0.001418 12.564742 2526.285657 934.343638
62 -0.008458 10.670669 3201.250289 2284.051658
63 -0.005883 6.972152 2792.655884 1518.529172
[64 rows x 134 columns]
testing data: 0 1
1 1
2 1
3 1
4 1
..
59 0
60 0
61 0
62 0
63 0
Name: Diagnosis (ALS), Length: 64, dtype: int64
In [16]:
from sklearn.model_selection import train_test_split
In [17]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,test_size=0.15,stratify=y)
In [18]:
x_train
Out[18]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(6) | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 14 | 52 | F | 62 | 1.394205 | 0.839935 | 0.703224 | 1.362995 | 4.661601 | 2.402909 | 2.826132 | ... | -0.000823 | 0.025207 | -0.002524 | -0.006103 | -0.012850 | -0.015023 | 0.019711 | 12.862700 | 3092.653846 | 1552.852150 |
| 10 | 39 | M | 67 | 1.232990 | 0.561293 | 0.784691 | 1.889785 | 4.139457 | 1.915796 | 2.692091 | ... | 0.031624 | -0.018089 | 0.018493 | 0.006547 | -0.004817 | -0.003710 | -0.000692 | 2.276702 | 1686.160000 | 669.461749 |
| 59 | 123 | M | 43 | 0.255799 | 0.123679 | 0.182658 | 0.505591 | 6.222031 | 2.876602 | 3.894294 | ... | -0.060395 | 0.220533 | 0.089766 | -0.120838 | -0.004221 | -0.013165 | 0.004642 | 9.855665 | 3128.341308 | 1990.937097 |
| 35 | 24 | M | 60 | 0.391222 | 0.200687 | 0.232286 | 0.638498 | 2.324343 | 1.181015 | 1.475889 | ... | -0.005405 | -0.006063 | 0.028443 | -0.006618 | 0.003011 | -0.005547 | -0.002806 | 10.525555 | 2700.419449 | 1393.549002 |
| 44 | 77 | F | 40 | 0.349185 | 0.213207 | 0.227957 | 0.347673 | 3.395795 | 1.826527 | 2.123809 | ... | -0.042489 | -0.021018 | 0.049357 | 0.005870 | -0.027345 | -0.026138 | 0.009430 | 10.594026 | 3515.050257 | 1925.062482 |
| 26 | 94 | F | 55 | 0.764443 | 0.423008 | 0.472764 | 1.699466 | 8.254286 | 3.966819 | 5.010081 | ... | 0.018794 | 0.036827 | 0.029426 | -0.024056 | -0.065735 | 0.012986 | 0.012941 | 9.482607 | 2471.097222 | 1019.521207 |
| 20 | 72 | F | 64 | 0.797801 | 0.470116 | 0.522910 | 1.663179 | 7.778152 | 3.640445 | 4.355894 | ... | 0.028207 | -0.052002 | -0.038210 | 0.028143 | 0.033156 | 0.010924 | 0.020823 | 5.924309 | 1880.538263 | 482.819916 |
| 27 | 96 | F | 52 | 0.475047 | 0.308491 | 0.266091 | 0.396462 | 3.686641 | 1.999494 | 2.083195 | ... | 0.009989 | 0.000009 | 0.042347 | -0.003170 | 0.002103 | 0.011023 | -0.031324 | 11.597077 | 2792.655884 | 1457.933269 |
| 3 | 22 | F | 70 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | 1.915058 | ... | 0.013213 | -0.019285 | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 |
| 23 | 80 | F | 63 | 0.504802 | 0.253832 | 0.313823 | 1.229761 | 6.571067 | 2.833840 | 4.277576 | ... | 0.013513 | -0.003382 | 0.027770 | -0.005112 | -0.027102 | 0.012612 | -0.010165 | 7.339732 | 3556.978755 | 2129.076098 |
| 4 | 24 | M | 66 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | 1.420891 | ... | 0.002948 | -0.005743 | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 |
| 53 | 111 | F | 60 | 0.121065 | 0.079473 | 0.101627 | 0.285497 | 1.117740 | 0.558948 | 0.615284 | ... | -0.005255 | 0.017626 | -0.007311 | -0.006541 | -0.012625 | 0.013987 | 0.014222 | 11.261019 | 3201.250289 | 1960.299963 |
| 34 | 16 | M | 38 | 0.415366 | 0.254119 | 0.243906 | 0.449362 | 2.394697 | 1.292778 | 1.468358 | ... | 0.000016 | 0.008047 | -0.005823 | -0.006588 | -0.000030 | 0.003137 | -0.002440 | 8.531677 | 3515.050257 | 2441.219054 |
| 41 | 61 | F | 37 | 0.818954 | 0.484998 | 0.511515 | 0.721296 | 4.942091 | 2.724820 | 3.320166 | ... | 0.005945 | 0.021267 | -0.008438 | 0.007339 | -0.004838 | -0.004133 | 0.000937 | 10.151495 | 2444.009071 | 1157.993834 |
| 57 | 119 | F | 65 | 0.356684 | 0.233194 | 0.242305 | 0.410246 | 3.251335 | 1.867689 | 1.908461 | ... | 0.005494 | -0.003761 | 0.006982 | 0.012774 | 0.008384 | 0.002823 | -0.014323 | 10.889792 | 2471.097222 | 1549.074209 |
| 62 | 129 | F | 68 | 1.336216 | 0.815757 | 0.733197 | 0.981928 | 11.224542 | 5.295879 | 6.994751 | ... | 0.025679 | 0.015712 | 0.013437 | 0.025113 | 0.008852 | -0.010132 | -0.008458 | 10.670669 | 3201.250289 | 2284.051658 |
| 51 | 107 | M | 80 | 0.532330 | 0.296012 | 0.322217 | 0.650045 | 8.042171 | 4.754839 | 5.060159 | ... | -0.014078 | 0.010696 | -0.003448 | -0.015049 | 0.010195 | -0.002152 | 0.001881 | 9.472404 | 2238.670803 | 1241.852695 |
| 37 | 28 | M | 35 | 0.758571 | 0.464961 | 0.413086 | 0.605323 | 9.271523 | 5.707370 | 5.150374 | ... | -0.003686 | 0.018992 | 0.034957 | 0.016890 | -0.009376 | -0.008998 | -0.007465 | 12.892692 | 2888.617021 | 1653.804580 |
| 13 | 48 | F | 63 | 0.805433 | 0.335434 | 0.487871 | 1.813700 | 4.485662 | 2.192828 | 2.864096 | ... | 0.005770 | -0.028306 | -0.035823 | -0.004287 | -0.014985 | -0.004156 | -0.005061 | 7.029500 | 2730.764545 | 1288.920905 |
| 7 | 28 | M | 58 | 1.210548 | 0.726523 | 0.661670 | 1.205596 | 8.492104 | 4.109625 | 5.377768 | ... | 0.007534 | 0.017088 | 0.015907 | -0.006379 | 0.057303 | -0.001919 | -0.008007 | 8.422353 | 1964.218942 | 759.068477 |
| 33 | 6 | M | 41 | 1.063272 | 0.683918 | 0.504427 | 0.613402 | 3.176717 | 1.870164 | 1.590798 | ... | 0.017293 | -0.008880 | -0.001511 | 0.003965 | 0.015240 | -0.006500 | -0.008723 | 10.482453 | 2263.284796 | 801.333727 |
| 60 | 125 | M | 63 | 0.513175 | 0.296489 | 0.334845 | 0.729804 | 9.686563 | 4.327943 | 5.687977 | ... | -0.046223 | 0.028016 | -0.038739 | 0.011588 | -0.011281 | -0.004294 | 0.011239 | 11.094558 | 1964.218942 | 601.076046 |
| 50 | 99 | F | 57 | 0.098881 | 0.065791 | 0.092655 | 0.363699 | 0.883453 | 0.449099 | 0.497111 | ... | 0.015152 | -0.027019 | 0.037807 | -0.019812 | 0.000436 | -0.002840 | -0.013161 | 10.108459 | 2143.018556 | 805.694015 |
| 15 | 55 | M | 61 | 1.177795 | 0.730069 | 0.569287 | 1.542224 | 5.883227 | 3.384535 | 2.875918 | ... | 0.028733 | -0.004708 | -0.004383 | 0.006398 | -0.014412 | -0.005085 | 0.005530 | 7.148809 | 2051.627447 | 784.563460 |
| 9 | 32 | M | 61 | 0.387730 | 0.213745 | 0.249993 | 0.591160 | 3.351240 | 1.874979 | 2.075762 | ... | 0.004241 | 0.006956 | -0.002013 | -0.000082 | 0.008275 | -0.013829 | -0.006955 | 7.572111 | 1780.825796 | 838.978523 |
| 32 | 4 | F | 53 | 0.598550 | 0.350577 | 0.378646 | 0.654399 | 4.224992 | 2.312947 | 2.678596 | ... | 0.001768 | -0.003924 | 0.001995 | 0.004019 | -0.007207 | 0.004614 | 0.003154 | 7.615608 | 2921.471038 | 1230.982918 |
| 45 | 81 | F | 60 | 0.286517 | 0.176603 | 0.195712 | 0.692300 | 1.847736 | 0.869278 | 1.099434 | ... | 0.023767 | -0.020693 | 0.019078 | -0.034556 | -0.006731 | -0.025442 | -0.005668 | 12.874560 | 2670.464441 | 1309.871125 |
| 52 | 109 | F | 59 | 0.326851 | 0.215126 | 0.182667 | 0.304952 | 2.801295 | 1.669496 | 1.509331 | ... | -0.000688 | -0.018500 | 0.030142 | 0.002111 | 0.006603 | -0.013728 | -0.021879 | 8.064921 | 2700.419449 | 1228.586973 |
| 18 | 64 | M | 57 | 0.426554 | 0.202661 | 0.255198 | 0.885479 | 4.387137 | 1.892587 | 2.628737 | ... | 0.003446 | 0.010420 | 0.005288 | -0.006713 | 0.013147 | -0.017816 | 0.017087 | 11.136041 | 2238.670803 | 930.223353 |
| 5 | 25 | M | 51 | 0.339593 | 0.182070 | 0.204186 | 0.505987 | 1.969217 | 0.834783 | 1.208688 | ... | 0.009548 | -0.005489 | 0.010164 | -0.008341 | -0.006132 | 0.005441 | 0.003568 | 9.810520 | 2143.018556 | 1004.727725 |
| 29 | 100 | M | 69 | 0.511742 | 0.313666 | 0.320677 | 0.431433 | 7.417198 | 4.258109 | 4.961685 | ... | -0.013859 | 0.000308 | -0.038984 | 0.048771 | -0.014493 | 0.008647 | -0.004524 | 2.512995 | 1233.583584 | 48.246203 |
| 24 | 84 | F | 55 | 0.419330 | 0.255329 | 0.338319 | 0.486463 | 4.366452 | 1.932596 | 2.855240 | ... | -0.005714 | 0.012337 | -0.005475 | -0.001179 | -0.002687 | 0.005369 | 0.034289 | 11.766102 | 2888.617021 | 1839.961952 |
| 19 | 68 | M | 40 | 0.496922 | 0.213975 | 0.293201 | 0.957065 | 3.293700 | 1.672811 | 1.894214 | ... | 0.023989 | -0.030137 | 0.024769 | 0.000567 | 0.005071 | -0.023693 | 0.009770 | 7.872279 | 2143.018556 | 985.160918 |
| 30 | 102 | F | 53 | 0.561542 | 0.331788 | 0.345130 | 1.020709 | 6.074875 | 2.798090 | 3.526055 | ... | 0.022664 | 0.008537 | -0.000306 | -0.012570 | -0.048113 | 0.009073 | -0.005201 | 12.036001 | 2526.285657 | 1200.269866 |
| 54 | 113 | F | 62 | 0.823780 | 0.503064 | 0.497916 | 0.704065 | 6.861939 | 3.993216 | 4.174705 | ... | 0.014494 | 0.003410 | 0.000716 | -0.010791 | 0.004878 | -0.005527 | -0.001659 | 7.388631 | 2096.808356 | 737.085571 |
| 6 | 27 | M | 57 | 0.691093 | 0.406901 | 0.406287 | 0.765986 | 6.168256 | 3.702088 | 3.286232 | ... | 0.002661 | -0.012605 | 0.013385 | 0.004513 | 0.001568 | -0.008244 | 0.005801 | 5.945219 | 2313.388825 | 1219.744513 |
| 48 | 89 | F | 45 | 0.155762 | 0.091831 | 0.117099 | 0.575170 | 1.575403 | 0.738640 | 0.834830 | ... | 0.002348 | -0.002313 | 0.008766 | 0.006330 | 0.016645 | -0.003417 | 0.002115 | 10.713432 | 3599.554394 | 2226.127951 |
| 2 | 21 | F | 58 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | 1.071950 | ... | 0.007199 | -0.013927 | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 |
| 12 | 46 | F | 50 | 0.199868 | 0.132266 | 0.157546 | 0.454599 | 1.467287 | 0.706004 | 0.819799 | ... | 0.001819 | 0.011291 | -0.001916 | 0.001513 | -0.002502 | 0.002014 | 0.010446 | 10.950821 | 3164.536485 | 1553.425003 |
| 55 | 115 | F | 50 | 0.462076 | 0.269359 | 0.296952 | 0.909990 | 5.020638 | 2.305690 | 2.837745 | ... | 0.001699 | -0.001674 | 0.012043 | -0.003015 | 0.022928 | -0.005444 | -0.006394 | 14.651111 | 3515.050257 | 2210.936432 |
| 22 | 78 | F | 64 | 0.437128 | 0.265519 | 0.349086 | 0.493971 | 3.205305 | 1.515726 | 2.119909 | ... | -0.060397 | 0.056134 | -0.069202 | -0.040180 | -0.058987 | 0.044552 | 0.077897 | 2.986929 | 1800.351911 | 784.205580 |
| 49 | 97 | F | 39 | 0.463874 | 0.290374 | 0.278926 | 0.550290 | 2.449028 | 1.372602 | 1.412486 | ... | -0.000234 | 0.011520 | 0.009453 | 0.002072 | -0.002811 | 0.004577 | -0.009058 | 10.616077 | 3164.536485 | 2037.766311 |
| 28 | 98 | M | 68 | 1.076446 | 0.624824 | 0.561100 | 1.965493 | 7.001105 | 3.785627 | 4.033533 | ... | -0.011602 | 0.018274 | 0.004485 | 0.005101 | -0.007367 | 0.007041 | -0.001560 | 11.097431 | 3515.050257 | 2157.871393 |
| 46 | 85 | F | 55 | 0.454844 | 0.289458 | 0.293609 | 0.582757 | 2.124786 | 1.170689 | 1.182154 | ... | 0.006111 | 0.003214 | 0.007520 | -0.024862 | 0.016600 | -0.015827 | 0.002088 | 12.189059 | 3092.653846 | 1551.286187 |
| 16 | 58 | M | 58 | 5.391649 | 3.217293 | 3.321567 | 5.991336 | 29.441589 | 16.791944 | 18.368778 | ... | -0.002691 | 0.008994 | 0.025390 | 0.040231 | 0.003503 | -0.006546 | -0.024835 | 8.008742 | 1921.927690 | 583.380671 |
| 63 | 131 | F | 60 | 0.916706 | 0.566121 | 0.512857 | 1.467165 | 6.372832 | 3.251168 | 3.539229 | ... | -0.072828 | -0.046235 | 0.041946 | -0.065313 | -0.016682 | 0.061026 | -0.005883 | 6.972152 | 2792.655884 | 1518.529172 |
| 21 | 76 | M | 68 | 0.379367 | 0.223560 | 0.308426 | 1.821009 | 5.539475 | 2.551095 | 3.000675 | ... | 0.008846 | -0.006281 | -0.004169 | -0.008164 | 0.012667 | -0.015442 | 0.001157 | 5.218871 | 1649.621788 | 177.843734 |
| 58 | 121 | F | 67 | 0.237654 | 0.154117 | 0.154312 | 0.544602 | 3.583597 | 1.690501 | 1.840416 | ... | -0.027062 | -0.002917 | 0.020006 | -0.064556 | 0.014446 | 0.015248 | -0.013882 | 9.311776 | 2670.464441 | 1087.940178 |
| 61 | 127 | F | 67 | 0.383901 | 0.245923 | 0.251359 | 0.415136 | 4.148414 | 2.069757 | 2.527213 | ... | -0.005008 | 0.011685 | 0.007883 | -0.014839 | 0.013859 | 0.011145 | 0.001418 | 12.564742 | 2526.285657 | 934.343638 |
| 38 | 42 | M | 60 | 0.182721 | 0.089174 | 0.113216 | 0.651748 | 1.839764 | 0.673977 | 1.084464 | ... | 0.021454 | 0.007635 | 0.019134 | 0.013320 | 0.002002 | -0.022146 | 0.005825 | 15.420777 | 2313.388825 | 1381.628235 |
| 11 | 42 | M | 67 | 1.608454 | 0.989100 | 0.894706 | 1.304613 | 6.031953 | 3.279233 | 3.729620 | ... | 0.019701 | 0.004293 | 0.026891 | 0.080337 | -0.007352 | 0.055993 | 0.075071 | 10.674106 | 2007.441819 | 481.009629 |
| 47 | 86 | F | 63 | 0.753234 | 0.467912 | 0.442792 | 0.540327 | 4.801768 | 2.677700 | 3.241265 | ... | -0.018937 | 0.011239 | 0.011488 | 0.015334 | 0.008337 | 0.005474 | 0.011687 | 10.515820 | 3057.463491 | 1494.054076 |
| 31 | 2 | F | 64 | 0.219429 | 0.144385 | 0.171661 | 0.555528 | 2.054277 | 1.108746 | 1.260472 | ... | -0.011558 | -0.004168 | 0.018603 | 0.019350 | 0.016342 | 0.014127 | 0.022756 | 10.121803 | 2988.533127 | 1332.559788 |
| 56 | 117 | M | 49 | 0.319535 | 0.143602 | 0.193376 | 0.846877 | 4.171980 | 1.868911 | 2.581316 | ... | -0.033465 | 0.019372 | -0.041478 | 0.004583 | -0.013913 | -0.007205 | 0.013409 | 10.286022 | 1985.712014 | 804.666593 |
54 rows × 134 columns
In [19]:
x_test
Out[19]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(6) | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 31 | M | 67 | 0.537010 | 0.243234 | 0.318075 | 1.951256 | 6.568645 | 2.891654 | 4.620345 | ... | 0.014234 | 0.013594 | -0.003722 | -0.003859 | -0.011985 | -0.027240 | 0.002325 | 8.760510 | 1840.022120 | 669.022078 |
| 39 | 49 | M | 38 | 0.176448 | 0.103346 | 0.123381 | 0.649644 | 2.649815 | 1.410593 | 1.504265 | ... | 0.016461 | 0.014798 | -0.011568 | -0.009276 | -0.003996 | -0.003248 | -0.005049 | 10.074644 | 2119.782609 | 580.338238 |
| 25 | 92 | F | 39 | 2.061820 | 1.219720 | 1.280772 | 1.306779 | 15.095252 | 7.876805 | 9.839396 | ... | 0.009492 | -0.033642 | -0.032431 | -0.056497 | -0.003210 | 0.019443 | 0.004703 | 7.986808 | 3164.536485 | 1734.620853 |
| 17 | 62 | M | 57 | 0.351191 | 0.165077 | 0.227554 | 0.849025 | 3.695872 | 1.776465 | 2.379529 | ... | 0.024865 | -0.008019 | 0.005046 | 0.023446 | -0.000726 | -0.016418 | 0.010174 | 6.031056 | 2074.091402 | 927.063276 |
| 40 | 53 | M | 60 | 0.487857 | 0.208116 | 0.282759 | 1.183502 | 4.734293 | 2.542676 | 3.053723 | ... | -0.003976 | 0.001242 | 0.005562 | -0.000695 | 0.003333 | 0.004020 | -0.005159 | 9.943562 | 2471.097222 | 1212.922300 |
| 1 | 20 | F | 57 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | 1.179851 | ... | 0.019235 | 0.002485 | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 |
| 36 | 26 | M | 34 | 0.163026 | 0.094282 | 0.112654 | 0.378197 | 2.617874 | 1.361172 | 1.550565 | ... | -0.010711 | 0.006573 | 0.003950 | -0.003954 | 0.001273 | 0.005890 | 0.000385 | 8.471564 | 2700.419449 | 1293.468915 |
| 42 | 63 | F | 50 | 0.323175 | 0.126138 | 0.176197 | 0.907850 | 3.069980 | 1.372204 | 1.869070 | ... | 0.004152 | -0.001733 | 0.030566 | -0.037259 | -0.020167 | -0.042955 | -0.083977 | 9.167460 | 444.730268 | 1169.075556 |
| 43 | 65 | M | 52 | 1.529994 | 0.894926 | 0.809959 | 1.715071 | 7.627483 | 4.321696 | 4.415383 | ... | 0.015596 | -0.012218 | 0.015828 | -0.014697 | 0.010636 | 0.004654 | -0.003235 | 7.758796 | 2761.507400 | 1641.852909 |
| 0 | 8 | M | 58 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | 3.770575 | ... | 0.016809 | -0.024467 | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 |
10 rows × 134 columns
In [20]:
y_train
Out[20]:
14 1 10 1 59 0 35 0 44 0 26 1 20 1 27 1 3 1 23 1 4 1 53 0 34 0 41 0 57 0 62 0 51 0 37 0 13 1 7 1 33 0 60 0 50 0 15 1 9 1 32 0 45 0 52 0 18 1 5 1 29 1 24 1 19 1 30 1 54 0 6 1 48 0 2 1 12 1 55 0 22 1 49 0 28 1 46 0 16 1 63 0 21 1 58 0 61 0 38 0 11 1 47 0 31 0 56 0 Name: Diagnosis (ALS), dtype: int64
In [21]:
y_test
Out[21]:
8 1 39 0 25 1 17 1 40 0 1 1 36 0 42 0 43 0 0 1 Name: Diagnosis (ALS), dtype: int64
In [22]:
print(r.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 64 entries, 0 to 63 Columns: 135 entries, ID to Diagnosis (ALS) dtypes: float64(131), int64(3), object(1) memory usage: 67.6+ KB None
In [23]:
plt.figure(figsize=(20,10))
age=sns.countplot(x="Age",data=r)
for bars in age.containers:
age.bar_label(bars)
In [24]:
gender=sns.countplot(x="Sex",data=r)
for bars in gender.containers:
gender.bar_label(bars)
In [25]:
r["Sex"]=r["Sex"].replace({"M": 0,"F": 1}) ##categorical to binary
In [26]:
r["Sex"]
Out[26]:
0 0
1 1
2 1
3 1
4 0
..
59 0
60 0
61 1
62 1
63 1
Name: Sex, Length: 64, dtype: int64
In [27]:
r.head()
Out[27]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8 | 0 | 58 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | 3.770575 | ... | -0.024467 | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 | 1 |
| 1 | 20 | 1 | 57 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | 1.179851 | ... | 0.002485 | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 | 1 |
| 2 | 21 | 1 | 58 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | 1.071950 | ... | -0.013927 | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 | 1 |
| 3 | 22 | 1 | 70 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | 1.915058 | ... | -0.019285 | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 | 1 |
| 4 | 24 | 0 | 66 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | 1.420891 | ... | -0.005743 | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 | 1 |
5 rows × 135 columns
In [28]:
stats=r.describe().T ##statistics
In [29]:
print(stats)
count mean std min 25% \
ID 64.0 67.875000 37.358198 2.000000 31.750000
Sex 64.0 0.531250 0.502967 0.000000 0.000000
Age 64.0 56.390625 10.203668 34.000000 50.750000
J1_a 64.0 0.658951 0.724002 0.098881 0.325932
J3_a 64.0 0.379242 0.435636 0.065791 0.172422
... ... ... ... ... ...
dCCi(12) 64.0 0.001269 0.020800 -0.083977 -0.006534
d_1 64.0 9.164473 2.681449 2.276702 7.604734
F2_i 64.0 2495.116475 617.755856 444.730268 2051.627447
F2_{conv} 64.0 1209.976405 553.694046 48.246203 800.181156
Diagnosis (ALS) 64.0 0.484375 0.503706 0.000000 0.000000
50% 75% max
ID 66.500000 98.250000 131.000000
Sex 1.000000 1.000000 1.000000
Age 58.000000 63.250000 80.000000
J1_a 0.458935 0.772783 5.391649
J3_a 0.253976 0.465699 3.217293
... ... ... ...
dCCi(12) 0.000661 0.009515 0.077897
d_1 9.646564 10.757522 15.420777
F2_i 2471.097222 2938.236560 3599.554394
F2_{conv} 1206.596083 1551.677678 2441.219054
Diagnosis (ALS) 0.000000 1.000000 1.000000
[135 rows x 8 columns]
In [30]:
r.mode()
Out[30]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24.0 | 1.0 | 60.0 | 0.098881 | 0.065791 | 0.092655 | 0.285497 | 0.883453 | 0.449099 | 0.497111 | ... | -0.052002 | -0.069202 | -0.120838 | -0.065735 | -0.042955 | -0.083977 | 2.276702 | 3515.050257 | 48.246203 | 0.0 |
| 1 | 28.0 | NaN | NaN | 0.121065 | 0.079473 | 0.101627 | 0.304952 | 1.117740 | 0.558948 | 0.615284 | ... | -0.046235 | -0.041478 | -0.065313 | -0.058987 | -0.034648 | -0.031324 | 2.512995 | NaN | 177.843734 | NaN |
| 2 | 42.0 | NaN | NaN | 0.155762 | 0.089174 | 0.112654 | 0.347673 | 1.467287 | 0.673977 | 0.819799 | ... | -0.033642 | -0.038984 | -0.064556 | -0.048113 | -0.027240 | -0.024835 | 2.986929 | NaN | 359.409974 | NaN |
| 3 | NaN | NaN | NaN | 0.163026 | 0.091831 | 0.113216 | 0.363699 | 1.575403 | 0.706004 | 0.834830 | ... | -0.030137 | -0.038739 | -0.056497 | -0.037710 | -0.026549 | -0.021879 | 4.825476 | NaN | 481.009629 | NaN |
| 4 | NaN | NaN | NaN | 0.176448 | 0.094282 | 0.117099 | 0.378197 | 1.839764 | 0.738640 | 1.071950 | ... | -0.028306 | -0.038210 | -0.040180 | -0.027345 | -0.026138 | -0.021149 | 5.218871 | NaN | 482.819916 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59 | NaN | NaN | NaN | 1.394205 | 0.839935 | 0.784691 | 1.821009 | 9.271523 | 4.754839 | 5.377768 | ... | 0.025207 | 0.037807 | 0.028143 | 0.016645 | 0.015248 | 0.020823 | 12.862700 | NaN | 2157.871393 | NaN |
| 60 | NaN | NaN | NaN | 1.529994 | 0.894926 | 0.809959 | 1.889785 | 9.686563 | 5.295879 | 5.687977 | ... | 0.028016 | 0.041946 | 0.040231 | 0.022928 | 0.019443 | 0.022756 | 12.874560 | NaN | 2210.936432 | NaN |
| 61 | NaN | NaN | NaN | 1.608454 | 0.989100 | 0.894706 | 1.951256 | 11.224542 | 5.707370 | 6.994751 | ... | 0.036827 | 0.042347 | 0.048771 | 0.033156 | 0.044552 | 0.034289 | 12.892692 | NaN | 2226.127951 | NaN |
| 62 | NaN | NaN | NaN | 2.061820 | 1.219720 | 1.280772 | 1.965493 | 15.095252 | 7.876805 | 9.839396 | ... | 0.056134 | 0.049357 | 0.051874 | 0.035976 | 0.055993 | 0.075071 | 14.651111 | NaN | 2284.051658 | NaN |
| 63 | NaN | NaN | NaN | 5.391649 | 3.217293 | 3.321567 | 5.991336 | 29.441589 | 16.791944 | 18.368778 | ... | 0.220533 | 0.089766 | 0.080337 | 0.057303 | 0.061026 | 0.077897 | 15.420777 | NaN | 2441.219054 | NaN |
64 rows × 135 columns
In [31]:
columns=r.columns
In [32]:
len(r.columns)
Out[32]:
135
In [33]:
135/5
Out[33]:
27.0
In [34]:
for column in r:
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
sns.histplot(r[column],bins=8,color="red",kde=True)
plt.title(f"Histogram of "+column)
plt.show()
In [35]:
for col in columns:
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
sns.boxplot(x=r[col])
plt.title("boxplot of" +col)
plt.xlabel(col)
plt.ylabel("Frequency")
plt.show()
In [36]:
corr_matrix=r.corr()
In [37]:
correlation_matrix=r.corr()
In [38]:
plt.figure(figsize=(200,200))
sns.heatmap(data=correlation_matrix,vmax=None,annot=True,fmt=".2g")
plt.title("correlation Heatmap")
plt.show()
plt.tight_layout()
<Figure size 640x480 with 0 Axes>
In [52]:
pd.crosstab(r.Sex,r['Diagnosis(ALS)'])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3791, in Index.get_loc(self, key) 3790 try: -> 3791 return self._engine.get_loc(casted_key) 3792 except KeyError as err: File index.pyx:152, in pandas._libs.index.IndexEngine.get_loc() File index.pyx:181, in pandas._libs.index.IndexEngine.get_loc() File pandas\_libs\hashtable_class_helper.pxi:7080, in pandas._libs.hashtable.PyObjectHashTable.get_item() File pandas\_libs\hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'Diagnosis(ALS)' The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) Cell In[52], line 1 ----> 1 pd.crosstab(r.Sex,r['Diagnosis(ALS)']) File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:3893, in DataFrame.__getitem__(self, key) 3891 if self.columns.nlevels > 1: 3892 return self._getitem_multilevel(key) -> 3893 indexer = self.columns.get_loc(key) 3894 if is_integer(indexer): 3895 indexer = [indexer] File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3798, in Index.get_loc(self, key) 3793 if isinstance(casted_key, slice) or ( 3794 isinstance(casted_key, abc.Iterable) 3795 and any(isinstance(x, slice) for x in casted_key) 3796 ): 3797 raise InvalidIndexError(key) -> 3798 raise KeyError(key) from err 3799 except TypeError: 3800 # If we have a listlike key, _check_indexing_error will raise 3801 # InvalidIndexError. Otherwise we fall through and re-raise 3802 # the TypeError. 3803 self._check_indexing_error(key) KeyError: 'Diagnosis(ALS)'
In [53]:
pd.crosstab(r.Age,r.Sex)
Out[53]:
| Sex | F | M |
|---|---|---|
| Age | ||
| 34 | 0 | 1 |
| 35 | 0 | 1 |
| 37 | 1 | 0 |
| 38 | 0 | 2 |
| 39 | 2 | 0 |
| 40 | 1 | 1 |
| 41 | 0 | 1 |
| 43 | 0 | 1 |
| 45 | 1 | 0 |
| 49 | 0 | 1 |
| 50 | 3 | 0 |
| 51 | 0 | 1 |
| 52 | 1 | 1 |
| 53 | 2 | 0 |
| 55 | 3 | 0 |
| 57 | 2 | 3 |
| 58 | 1 | 3 |
| 59 | 1 | 0 |
| 60 | 3 | 3 |
| 61 | 0 | 2 |
| 62 | 2 | 0 |
| 63 | 3 | 1 |
| 64 | 3 | 0 |
| 65 | 1 | 0 |
| 66 | 0 | 1 |
| 67 | 2 | 3 |
| 68 | 1 | 2 |
| 69 | 0 | 1 |
| 70 | 1 | 0 |
| 80 | 0 | 1 |
In [54]:
sns.pairplot(data=r.iloc[:,2:15],palette=dict)
Out[54]:
<seaborn.axisgrid.PairGrid at 0x24b1b7e2310>
In [55]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,StratifiedKFold,LeaveOneOut,ShuffleSplit
from sklearn.metrics import accuracy_score,classification_report
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
In [56]:
x=r.iloc[:,:-1]
y=r.iloc[:,-1]
In [57]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=18)
In [58]:
x_train
Out[58]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(6) | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 30 | 102 | F | 53 | 0.561542 | 0.331788 | 0.345130 | 1.020709 | 6.074875 | 2.798090 | 3.526055 | ... | 0.022664 | 0.008537 | -0.000306 | -0.012570 | -0.048113 | 0.009073 | -0.005201 | 12.036001 | 2526.285657 | 1200.269866 |
| 39 | 49 | M | 38 | 0.176448 | 0.103346 | 0.123381 | 0.649644 | 2.649815 | 1.410593 | 1.504265 | ... | 0.016461 | 0.014798 | -0.011568 | -0.009276 | -0.003996 | -0.003248 | -0.005049 | 10.074644 | 2119.782609 | 580.338238 |
| 54 | 113 | F | 62 | 0.823780 | 0.503064 | 0.497916 | 0.704065 | 6.861939 | 3.993216 | 4.174705 | ... | 0.014494 | 0.003410 | 0.000716 | -0.010791 | 0.004878 | -0.005527 | -0.001659 | 7.388631 | 2096.808356 | 737.085571 |
| 52 | 109 | F | 59 | 0.326851 | 0.215126 | 0.182667 | 0.304952 | 2.801295 | 1.669496 | 1.509331 | ... | -0.000688 | -0.018500 | 0.030142 | 0.002111 | 0.006603 | -0.013728 | -0.021879 | 8.064921 | 2700.419449 | 1228.586973 |
| 45 | 81 | F | 60 | 0.286517 | 0.176603 | 0.195712 | 0.692300 | 1.847736 | 0.869278 | 1.099434 | ... | 0.023767 | -0.020693 | 0.019078 | -0.034556 | -0.006731 | -0.025442 | -0.005668 | 12.874560 | 2670.464441 | 1309.871125 |
| 61 | 127 | F | 67 | 0.383901 | 0.245923 | 0.251359 | 0.415136 | 4.148414 | 2.069757 | 2.527213 | ... | -0.005008 | 0.011685 | 0.007883 | -0.014839 | 0.013859 | 0.011145 | 0.001418 | 12.564742 | 2526.285657 | 934.343638 |
| 28 | 98 | M | 68 | 1.076446 | 0.624824 | 0.561100 | 1.965493 | 7.001105 | 3.785627 | 4.033533 | ... | -0.011602 | 0.018274 | 0.004485 | 0.005101 | -0.007367 | 0.007041 | -0.001560 | 11.097431 | 3515.050257 | 2157.871393 |
| 58 | 121 | F | 67 | 0.237654 | 0.154117 | 0.154312 | 0.544602 | 3.583597 | 1.690501 | 1.840416 | ... | -0.027062 | -0.002917 | 0.020006 | -0.064556 | 0.014446 | 0.015248 | -0.013882 | 9.311776 | 2670.464441 | 1087.940178 |
| 23 | 80 | F | 63 | 0.504802 | 0.253832 | 0.313823 | 1.229761 | 6.571067 | 2.833840 | 4.277576 | ... | 0.013513 | -0.003382 | 0.027770 | -0.005112 | -0.027102 | 0.012612 | -0.010165 | 7.339732 | 3556.978755 | 2129.076098 |
| 33 | 6 | M | 41 | 1.063272 | 0.683918 | 0.504427 | 0.613402 | 3.176717 | 1.870164 | 1.590798 | ... | 0.017293 | -0.008880 | -0.001511 | 0.003965 | 0.015240 | -0.006500 | -0.008723 | 10.482453 | 2263.284796 | 801.333727 |
| 41 | 61 | F | 37 | 0.818954 | 0.484998 | 0.511515 | 0.721296 | 4.942091 | 2.724820 | 3.320166 | ... | 0.005945 | 0.021267 | -0.008438 | 0.007339 | -0.004838 | -0.004133 | 0.000937 | 10.151495 | 2444.009071 | 1157.993834 |
| 15 | 55 | M | 61 | 1.177795 | 0.730069 | 0.569287 | 1.542224 | 5.883227 | 3.384535 | 2.875918 | ... | 0.028733 | -0.004708 | -0.004383 | 0.006398 | -0.014412 | -0.005085 | 0.005530 | 7.148809 | 2051.627447 | 784.563460 |
| 31 | 2 | F | 64 | 0.219429 | 0.144385 | 0.171661 | 0.555528 | 2.054277 | 1.108746 | 1.260472 | ... | -0.011558 | -0.004168 | 0.018603 | 0.019350 | 0.016342 | 0.014127 | 0.022756 | 10.121803 | 2988.533127 | 1332.559788 |
| 27 | 96 | F | 52 | 0.475047 | 0.308491 | 0.266091 | 0.396462 | 3.686641 | 1.999494 | 2.083195 | ... | 0.009989 | 0.000009 | 0.042347 | -0.003170 | 0.002103 | 0.011023 | -0.031324 | 11.597077 | 2792.655884 | 1457.933269 |
| 26 | 94 | F | 55 | 0.764443 | 0.423008 | 0.472764 | 1.699466 | 8.254286 | 3.966819 | 5.010081 | ... | 0.018794 | 0.036827 | 0.029426 | -0.024056 | -0.065735 | 0.012986 | 0.012941 | 9.482607 | 2471.097222 | 1019.521207 |
| 59 | 123 | M | 43 | 0.255799 | 0.123679 | 0.182658 | 0.505591 | 6.222031 | 2.876602 | 3.894294 | ... | -0.060395 | 0.220533 | 0.089766 | -0.120838 | -0.004221 | -0.013165 | 0.004642 | 9.855665 | 3128.341308 | 1990.937097 |
| 35 | 24 | M | 60 | 0.391222 | 0.200687 | 0.232286 | 0.638498 | 2.324343 | 1.181015 | 1.475889 | ... | -0.005405 | -0.006063 | 0.028443 | -0.006618 | 0.003011 | -0.005547 | -0.002806 | 10.525555 | 2700.419449 | 1393.549002 |
| 40 | 53 | M | 60 | 0.487857 | 0.208116 | 0.282759 | 1.183502 | 4.734293 | 2.542676 | 3.053723 | ... | -0.003976 | 0.001242 | 0.005562 | -0.000695 | 0.003333 | 0.004020 | -0.005159 | 9.943562 | 2471.097222 | 1212.922300 |
| 29 | 100 | M | 69 | 0.511742 | 0.313666 | 0.320677 | 0.431433 | 7.417198 | 4.258109 | 4.961685 | ... | -0.013859 | 0.000308 | -0.038984 | 0.048771 | -0.014493 | 0.008647 | -0.004524 | 2.512995 | 1233.583584 | 48.246203 |
| 0 | 8 | M | 58 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | 3.770575 | ... | 0.016809 | -0.024467 | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 |
| 18 | 64 | M | 57 | 0.426554 | 0.202661 | 0.255198 | 0.885479 | 4.387137 | 1.892587 | 2.628737 | ... | 0.003446 | 0.010420 | 0.005288 | -0.006713 | 0.013147 | -0.017816 | 0.017087 | 11.136041 | 2238.670803 | 930.223353 |
| 55 | 115 | F | 50 | 0.462076 | 0.269359 | 0.296952 | 0.909990 | 5.020638 | 2.305690 | 2.837745 | ... | 0.001699 | -0.001674 | 0.012043 | -0.003015 | 0.022928 | -0.005444 | -0.006394 | 14.651111 | 3515.050257 | 2210.936432 |
| 13 | 48 | F | 63 | 0.805433 | 0.335434 | 0.487871 | 1.813700 | 4.485662 | 2.192828 | 2.864096 | ... | 0.005770 | -0.028306 | -0.035823 | -0.004287 | -0.014985 | -0.004156 | -0.005061 | 7.029500 | 2730.764545 | 1288.920905 |
| 57 | 119 | F | 65 | 0.356684 | 0.233194 | 0.242305 | 0.410246 | 3.251335 | 1.867689 | 1.908461 | ... | 0.005494 | -0.003761 | 0.006982 | 0.012774 | 0.008384 | 0.002823 | -0.014323 | 10.889792 | 2471.097222 | 1549.074209 |
| 6 | 27 | M | 57 | 0.691093 | 0.406901 | 0.406287 | 0.765986 | 6.168256 | 3.702088 | 3.286232 | ... | 0.002661 | -0.012605 | 0.013385 | 0.004513 | 0.001568 | -0.008244 | 0.005801 | 5.945219 | 2313.388825 | 1219.744513 |
| 14 | 52 | F | 62 | 1.394205 | 0.839935 | 0.703224 | 1.362995 | 4.661601 | 2.402909 | 2.826132 | ... | -0.000823 | 0.025207 | -0.002524 | -0.006103 | -0.012850 | -0.015023 | 0.019711 | 12.862700 | 3092.653846 | 1552.852150 |
| 51 | 107 | M | 80 | 0.532330 | 0.296012 | 0.322217 | 0.650045 | 8.042171 | 4.754839 | 5.060159 | ... | -0.014078 | 0.010696 | -0.003448 | -0.015049 | 0.010195 | -0.002152 | 0.001881 | 9.472404 | 2238.670803 | 1241.852695 |
| 9 | 32 | M | 61 | 0.387730 | 0.213745 | 0.249993 | 0.591160 | 3.351240 | 1.874979 | 2.075762 | ... | 0.004241 | 0.006956 | -0.002013 | -0.000082 | 0.008275 | -0.013829 | -0.006955 | 7.572111 | 1780.825796 | 838.978523 |
| 3 | 22 | F | 70 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | 1.915058 | ... | 0.013213 | -0.019285 | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 |
| 1 | 20 | F | 57 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | 1.179851 | ... | 0.019235 | 0.002485 | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 |
| 43 | 65 | M | 52 | 1.529994 | 0.894926 | 0.809959 | 1.715071 | 7.627483 | 4.321696 | 4.415383 | ... | 0.015596 | -0.012218 | 0.015828 | -0.014697 | 0.010636 | 0.004654 | -0.003235 | 7.758796 | 2761.507400 | 1641.852909 |
| 4 | 24 | M | 66 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | 1.420891 | ... | 0.002948 | -0.005743 | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 |
| 36 | 26 | M | 34 | 0.163026 | 0.094282 | 0.112654 | 0.378197 | 2.617874 | 1.361172 | 1.550565 | ... | -0.010711 | 0.006573 | 0.003950 | -0.003954 | 0.001273 | 0.005890 | 0.000385 | 8.471564 | 2700.419449 | 1293.468915 |
| 11 | 42 | M | 67 | 1.608454 | 0.989100 | 0.894706 | 1.304613 | 6.031953 | 3.279233 | 3.729620 | ... | 0.019701 | 0.004293 | 0.026891 | 0.080337 | -0.007352 | 0.055993 | 0.075071 | 10.674106 | 2007.441819 | 481.009629 |
| 10 | 39 | M | 67 | 1.232990 | 0.561293 | 0.784691 | 1.889785 | 4.139457 | 1.915796 | 2.692091 | ... | 0.031624 | -0.018089 | 0.018493 | 0.006547 | -0.004817 | -0.003710 | -0.000692 | 2.276702 | 1686.160000 | 669.461749 |
| 63 | 131 | F | 60 | 0.916706 | 0.566121 | 0.512857 | 1.467165 | 6.372832 | 3.251168 | 3.539229 | ... | -0.072828 | -0.046235 | 0.041946 | -0.065313 | -0.016682 | 0.061026 | -0.005883 | 6.972152 | 2792.655884 | 1518.529172 |
| 47 | 86 | F | 63 | 0.753234 | 0.467912 | 0.442792 | 0.540327 | 4.801768 | 2.677700 | 3.241265 | ... | -0.018937 | 0.011239 | 0.011488 | 0.015334 | 0.008337 | 0.005474 | 0.011687 | 10.515820 | 3057.463491 | 1494.054076 |
| 21 | 76 | M | 68 | 0.379367 | 0.223560 | 0.308426 | 1.821009 | 5.539475 | 2.551095 | 3.000675 | ... | 0.008846 | -0.006281 | -0.004169 | -0.008164 | 0.012667 | -0.015442 | 0.001157 | 5.218871 | 1649.621788 | 177.843734 |
| 60 | 125 | M | 63 | 0.513175 | 0.296489 | 0.334845 | 0.729804 | 9.686563 | 4.327943 | 5.687977 | ... | -0.046223 | 0.028016 | -0.038739 | 0.011588 | -0.011281 | -0.004294 | 0.011239 | 11.094558 | 1964.218942 | 601.076046 |
| 17 | 62 | M | 57 | 0.351191 | 0.165077 | 0.227554 | 0.849025 | 3.695872 | 1.776465 | 2.379529 | ... | 0.024865 | -0.008019 | 0.005046 | 0.023446 | -0.000726 | -0.016418 | 0.010174 | 6.031056 | 2074.091402 | 927.063276 |
| 34 | 16 | M | 38 | 0.415366 | 0.254119 | 0.243906 | 0.449362 | 2.394697 | 1.292778 | 1.468358 | ... | 0.000016 | 0.008047 | -0.005823 | -0.006588 | -0.000030 | 0.003137 | -0.002440 | 8.531677 | 3515.050257 | 2441.219054 |
| 24 | 84 | F | 55 | 0.419330 | 0.255329 | 0.338319 | 0.486463 | 4.366452 | 1.932596 | 2.855240 | ... | -0.005714 | 0.012337 | -0.005475 | -0.001179 | -0.002687 | 0.005369 | 0.034289 | 11.766102 | 2888.617021 | 1839.961952 |
| 8 | 31 | M | 67 | 0.537010 | 0.243234 | 0.318075 | 1.951256 | 6.568645 | 2.891654 | 4.620345 | ... | 0.014234 | 0.013594 | -0.003722 | -0.003859 | -0.011985 | -0.027240 | 0.002325 | 8.760510 | 1840.022120 | 669.022078 |
| 2 | 21 | F | 58 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | 1.071950 | ... | 0.007199 | -0.013927 | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 |
| 50 | 99 | F | 57 | 0.098881 | 0.065791 | 0.092655 | 0.363699 | 0.883453 | 0.449099 | 0.497111 | ... | 0.015152 | -0.027019 | 0.037807 | -0.019812 | 0.000436 | -0.002840 | -0.013161 | 10.108459 | 2143.018556 | 805.694015 |
| 46 | 85 | F | 55 | 0.454844 | 0.289458 | 0.293609 | 0.582757 | 2.124786 | 1.170689 | 1.182154 | ... | 0.006111 | 0.003214 | 0.007520 | -0.024862 | 0.016600 | -0.015827 | 0.002088 | 12.189059 | 3092.653846 | 1551.286187 |
| 49 | 97 | F | 39 | 0.463874 | 0.290374 | 0.278926 | 0.550290 | 2.449028 | 1.372602 | 1.412486 | ... | -0.000234 | 0.011520 | 0.009453 | 0.002072 | -0.002811 | 0.004577 | -0.009058 | 10.616077 | 3164.536485 | 2037.766311 |
| 5 | 25 | M | 51 | 0.339593 | 0.182070 | 0.204186 | 0.505987 | 1.969217 | 0.834783 | 1.208688 | ... | 0.009548 | -0.005489 | 0.010164 | -0.008341 | -0.006132 | 0.005441 | 0.003568 | 9.810520 | 2143.018556 | 1004.727725 |
| 56 | 117 | M | 49 | 0.319535 | 0.143602 | 0.193376 | 0.846877 | 4.171980 | 1.868911 | 2.581316 | ... | -0.033465 | 0.019372 | -0.041478 | 0.004583 | -0.013913 | -0.007205 | 0.013409 | 10.286022 | 1985.712014 | 804.666593 |
| 19 | 68 | M | 40 | 0.496922 | 0.213975 | 0.293201 | 0.957065 | 3.293700 | 1.672811 | 1.894214 | ... | 0.023989 | -0.030137 | 0.024769 | 0.000567 | 0.005071 | -0.023693 | 0.009770 | 7.872279 | 2143.018556 | 985.160918 |
| 42 | 63 | F | 50 | 0.323175 | 0.126138 | 0.176197 | 0.907850 | 3.069980 | 1.372204 | 1.869070 | ... | 0.004152 | -0.001733 | 0.030566 | -0.037259 | -0.020167 | -0.042955 | -0.083977 | 9.167460 | 444.730268 | 1169.075556 |
51 rows × 134 columns
In [59]:
x_test
Out[59]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(6) | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 48 | 89 | F | 45 | 0.155762 | 0.091831 | 0.117099 | 0.575170 | 1.575403 | 0.738640 | 0.834830 | ... | 0.002348 | -0.002313 | 0.008766 | 0.006330 | 0.016645 | -0.003417 | 0.002115 | 10.713432 | 3599.554394 | 2226.127951 |
| 38 | 42 | M | 60 | 0.182721 | 0.089174 | 0.113216 | 0.651748 | 1.839764 | 0.673977 | 1.084464 | ... | 0.021454 | 0.007635 | 0.019134 | 0.013320 | 0.002002 | -0.022146 | 0.005825 | 15.420777 | 2313.388825 | 1381.628235 |
| 53 | 111 | F | 60 | 0.121065 | 0.079473 | 0.101627 | 0.285497 | 1.117740 | 0.558948 | 0.615284 | ... | -0.005255 | 0.017626 | -0.007311 | -0.006541 | -0.012625 | 0.013987 | 0.014222 | 11.261019 | 3201.250289 | 1960.299963 |
| 7 | 28 | M | 58 | 1.210548 | 0.726523 | 0.661670 | 1.205596 | 8.492104 | 4.109625 | 5.377768 | ... | 0.007534 | 0.017088 | 0.015907 | -0.006379 | 0.057303 | -0.001919 | -0.008007 | 8.422353 | 1964.218942 | 759.068477 |
| 22 | 78 | F | 64 | 0.437128 | 0.265519 | 0.349086 | 0.493971 | 3.205305 | 1.515726 | 2.119909 | ... | -0.060397 | 0.056134 | -0.069202 | -0.040180 | -0.058987 | 0.044552 | 0.077897 | 2.986929 | 1800.351911 | 784.205580 |
| 12 | 46 | F | 50 | 0.199868 | 0.132266 | 0.157546 | 0.454599 | 1.467287 | 0.706004 | 0.819799 | ... | 0.001819 | 0.011291 | -0.001916 | 0.001513 | -0.002502 | 0.002014 | 0.010446 | 10.950821 | 3164.536485 | 1553.425003 |
| 25 | 92 | F | 39 | 2.061820 | 1.219720 | 1.280772 | 1.306779 | 15.095252 | 7.876805 | 9.839396 | ... | 0.009492 | -0.033642 | -0.032431 | -0.056497 | -0.003210 | 0.019443 | 0.004703 | 7.986808 | 3164.536485 | 1734.620853 |
| 44 | 77 | F | 40 | 0.349185 | 0.213207 | 0.227957 | 0.347673 | 3.395795 | 1.826527 | 2.123809 | ... | -0.042489 | -0.021018 | 0.049357 | 0.005870 | -0.027345 | -0.026138 | 0.009430 | 10.594026 | 3515.050257 | 1925.062482 |
| 32 | 4 | F | 53 | 0.598550 | 0.350577 | 0.378646 | 0.654399 | 4.224992 | 2.312947 | 2.678596 | ... | 0.001768 | -0.003924 | 0.001995 | 0.004019 | -0.007207 | 0.004614 | 0.003154 | 7.615608 | 2921.471038 | 1230.982918 |
| 20 | 72 | F | 64 | 0.797801 | 0.470116 | 0.522910 | 1.663179 | 7.778152 | 3.640445 | 4.355894 | ... | 0.028207 | -0.052002 | -0.038210 | 0.028143 | 0.033156 | 0.010924 | 0.020823 | 5.924309 | 1880.538263 | 482.819916 |
| 37 | 28 | M | 35 | 0.758571 | 0.464961 | 0.413086 | 0.605323 | 9.271523 | 5.707370 | 5.150374 | ... | -0.003686 | 0.018992 | 0.034957 | 0.016890 | -0.009376 | -0.008998 | -0.007465 | 12.892692 | 2888.617021 | 1653.804580 |
| 16 | 58 | M | 58 | 5.391649 | 3.217293 | 3.321567 | 5.991336 | 29.441589 | 16.791944 | 18.368778 | ... | -0.002691 | 0.008994 | 0.025390 | 0.040231 | 0.003503 | -0.006546 | -0.024835 | 8.008742 | 1921.927690 | 583.380671 |
| 62 | 129 | F | 68 | 1.336216 | 0.815757 | 0.733197 | 0.981928 | 11.224542 | 5.295879 | 6.994751 | ... | 0.025679 | 0.015712 | 0.013437 | 0.025113 | 0.008852 | -0.010132 | -0.008458 | 10.670669 | 3201.250289 | 2284.051658 |
13 rows × 134 columns
In [60]:
y_train
Out[60]:
30 1 39 0 54 0 52 0 45 0 61 0 28 1 58 0 23 1 33 0 41 0 15 1 31 0 27 1 26 1 59 0 35 0 40 0 29 1 0 1 18 1 55 0 13 1 57 0 6 1 14 1 51 0 9 1 3 1 1 1 43 0 4 1 36 0 11 1 10 1 63 0 47 0 21 1 60 0 17 1 34 0 24 1 8 1 2 1 50 0 46 0 49 0 5 1 56 0 19 1 42 0 Name: Diagnosis (ALS), dtype: int64
In [61]:
y_test
Out[61]:
48 0 38 0 53 0 7 1 22 1 12 1 25 1 44 0 32 0 20 1 37 0 16 1 62 0 Name: Diagnosis (ALS), dtype: int64
In [62]:
y_test.shape
Out[62]:
(13,)
ONE HOT ENCODER
In [63]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
In [64]:
categorical_cols=x_train.select_dtypes(include=["object"]).columns
print("CATEGORICAL COLUMNS :",categorical_cols)
numeric_cols=x_train.select_dtypes(include=["number"]).columns
numeric_cols=r[["Diagnosis (ALS)",'Age']]
print("NUMERIC COLUMNS:",numeric_cols)
CATEGORICAL COLUMNS : Index(['Sex'], dtype='object') NUMERIC COLUMNS: Diagnosis (ALS) Age 0 1 58 1 1 57 2 1 58 3 1 70 4 1 66 .. ... ... 59 0 43 60 0 63 61 0 67 62 0 68 63 0 60 [64 rows x 2 columns]
In [65]:
r.describe()
Out[65]:
| ID | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | S11_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | ... | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 | 64.000000 |
| mean | 67.875000 | 56.390625 | 0.658951 | 0.379242 | 0.395886 | 0.945496 | 5.072071 | 2.617924 | 3.078959 | 4.393513 | ... | 0.003393 | 0.006224 | -0.002889 | -0.002152 | -0.001969 | 0.001269 | 9.164473 | 2495.116475 | 1209.976405 | 0.484375 |
| std | 37.358198 | 10.203668 | 0.724002 | 0.435636 | 0.431926 | 0.791558 | 4.087221 | 2.289347 | 2.568471 | 3.327968 | ... | 0.033268 | 0.024670 | 0.028315 | 0.019542 | 0.018012 | 0.020800 | 2.681449 | 617.755856 | 553.694046 | 0.503706 |
| min | 2.000000 | 34.000000 | 0.098881 | 0.065791 | 0.092655 | 0.285497 | 0.883453 | 0.449099 | 0.497111 | 0.804179 | ... | -0.052002 | -0.069202 | -0.120838 | -0.065735 | -0.042955 | -0.083977 | 2.276702 | 444.730268 | 48.246203 | 0.000000 |
| 25% | 31.750000 | 50.750000 | 0.325932 | 0.172422 | 0.198274 | 0.538387 | 2.641830 | 1.369446 | 1.508064 | 2.123761 | ... | -0.008234 | -0.004222 | -0.009654 | -0.009853 | -0.012674 | -0.006534 | 7.604734 | 2051.627447 | 800.181156 | 0.000000 |
| 50% | 66.500000 | 58.000000 | 0.458935 | 0.253976 | 0.293405 | 0.698183 | 4.198486 | 1.966045 | 2.653666 | 3.634994 | ... | 0.000775 | 0.006272 | -0.000937 | -0.001614 | -0.003921 | 0.000661 | 9.646564 | 2471.097222 | 1206.596083 | 0.000000 |
| 75% | 98.250000 | 63.250000 | 0.772783 | 0.465699 | 0.476541 | 1.189025 | 6.259731 | 3.258184 | 3.801504 | 5.644808 | ... | 0.011561 | 0.019092 | 0.006745 | 0.008349 | 0.005578 | 0.009515 | 10.757522 | 2938.236560 | 1551.677678 | 1.000000 |
| max | 131.000000 | 80.000000 | 5.391649 | 3.217293 | 3.321567 | 5.991336 | 29.441589 | 16.791944 | 18.368778 | 20.750202 | ... | 0.220533 | 0.089766 | 0.080337 | 0.057303 | 0.061026 | 0.077897 | 15.420777 | 3599.554394 | 2441.219054 | 1.000000 |
8 rows × 134 columns
In [66]:
r.mode()
Out[66]:
| ID | Sex | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | ... | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Diagnosis (ALS) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24.0 | F | 60.0 | 0.098881 | 0.065791 | 0.092655 | 0.285497 | 0.883453 | 0.449099 | 0.497111 | ... | -0.052002 | -0.069202 | -0.120838 | -0.065735 | -0.042955 | -0.083977 | 2.276702 | 3515.050257 | 48.246203 | 0.0 |
| 1 | 28.0 | NaN | NaN | 0.121065 | 0.079473 | 0.101627 | 0.304952 | 1.117740 | 0.558948 | 0.615284 | ... | -0.046235 | -0.041478 | -0.065313 | -0.058987 | -0.034648 | -0.031324 | 2.512995 | NaN | 177.843734 | NaN |
| 2 | 42.0 | NaN | NaN | 0.155762 | 0.089174 | 0.112654 | 0.347673 | 1.467287 | 0.673977 | 0.819799 | ... | -0.033642 | -0.038984 | -0.064556 | -0.048113 | -0.027240 | -0.024835 | 2.986929 | NaN | 359.409974 | NaN |
| 3 | NaN | NaN | NaN | 0.163026 | 0.091831 | 0.113216 | 0.363699 | 1.575403 | 0.706004 | 0.834830 | ... | -0.030137 | -0.038739 | -0.056497 | -0.037710 | -0.026549 | -0.021879 | 4.825476 | NaN | 481.009629 | NaN |
| 4 | NaN | NaN | NaN | 0.176448 | 0.094282 | 0.117099 | 0.378197 | 1.839764 | 0.738640 | 1.071950 | ... | -0.028306 | -0.038210 | -0.040180 | -0.027345 | -0.026138 | -0.021149 | 5.218871 | NaN | 482.819916 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59 | NaN | NaN | NaN | 1.394205 | 0.839935 | 0.784691 | 1.821009 | 9.271523 | 4.754839 | 5.377768 | ... | 0.025207 | 0.037807 | 0.028143 | 0.016645 | 0.015248 | 0.020823 | 12.862700 | NaN | 2157.871393 | NaN |
| 60 | NaN | NaN | NaN | 1.529994 | 0.894926 | 0.809959 | 1.889785 | 9.686563 | 5.295879 | 5.687977 | ... | 0.028016 | 0.041946 | 0.040231 | 0.022928 | 0.019443 | 0.022756 | 12.874560 | NaN | 2210.936432 | NaN |
| 61 | NaN | NaN | NaN | 1.608454 | 0.989100 | 0.894706 | 1.951256 | 11.224542 | 5.707370 | 6.994751 | ... | 0.036827 | 0.042347 | 0.048771 | 0.033156 | 0.044552 | 0.034289 | 12.892692 | NaN | 2226.127951 | NaN |
| 62 | NaN | NaN | NaN | 2.061820 | 1.219720 | 1.280772 | 1.965493 | 15.095252 | 7.876805 | 9.839396 | ... | 0.056134 | 0.049357 | 0.051874 | 0.035976 | 0.055993 | 0.075071 | 14.651111 | NaN | 2284.051658 | NaN |
| 63 | NaN | NaN | NaN | 5.391649 | 3.217293 | 3.321567 | 5.991336 | 29.441589 | 16.791944 | 18.368778 | ... | 0.220533 | 0.089766 | 0.080337 | 0.057303 | 0.061026 | 0.077897 | 15.420777 | NaN | 2441.219054 | NaN |
64 rows × 135 columns
In [67]:
r.corr
Out[67]:
<bound method DataFrame.corr of ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a \
0 8 M 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477
1 20 F 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639
2 21 F 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743
3 22 F 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926
4 24 M 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109
.. ... .. ... ... ... ... ... ... ...
59 123 M 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602
60 125 M 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943
61 127 F 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757
62 129 F 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879
63 131 F 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168
S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) \
0 3.770575 ... -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149
1 1.179851 ... 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773
2 1.071950 ... -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588
3 1.915058 ... -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021
4 1.420891 ... -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784
.. ... ... ... ... ... ... ... ...
59 3.894294 ... 0.220533 0.089766 -0.120838 -0.004221 -0.013165 0.004642
60 5.687977 ... 0.028016 -0.038739 0.011588 -0.011281 -0.004294 0.011239
61 2.527213 ... 0.011685 0.007883 -0.014839 0.013859 0.011145 0.001418
62 6.994751 ... 0.015712 0.013437 0.025113 0.008852 -0.010132 -0.008458
63 3.539229 ... -0.046235 0.041946 -0.065313 -0.016682 0.061026 -0.005883
d_1 F2_i F2_{conv} Diagnosis (ALS)
0 4.825476 2526.285657 833.498083 1
1 5.729322 1985.712014 561.802625 1
2 8.258488 2364.695972 796.723440 1
3 5.447137 1860.172768 359.409974 1
4 8.562517 2051.627447 817.111847 1
.. ... ... ... ...
59 9.855665 3128.341308 1990.937097 0
60 11.094558 1964.218942 601.076046 0
61 12.564742 2526.285657 934.343638 0
62 10.670669 3201.250289 2284.051658 0
63 6.972152 2792.655884 1518.529172 0
[64 rows x 135 columns]>
In [68]:
preprocessor=ColumnTransformer(
transformers=[
('dog',
OneHotEncoder(handle_unknown="ignore"),
categorical_cols)
],
remainder="passthrough"
)
In [69]:
x_train_transformed=preprocessor.fit_transform(x_train)
In [70]:
x_train_transformed
Out[70]:
array([[1.00000000e+00, 0.00000000e+00, 1.02000000e+02, ...,
1.20360009e+01, 2.52628566e+03, 1.20026987e+03],
[0.00000000e+00, 1.00000000e+00, 4.90000000e+01, ...,
1.00746439e+01, 2.11978261e+03, 5.80338238e+02],
[1.00000000e+00, 0.00000000e+00, 1.13000000e+02, ...,
7.38863107e+00, 2.09680836e+03, 7.37085571e+02],
...,
[0.00000000e+00, 1.00000000e+00, 1.17000000e+02, ...,
1.02860219e+01, 1.98571201e+03, 8.04666593e+02],
[0.00000000e+00, 1.00000000e+00, 6.80000000e+01, ...,
7.87227878e+00, 2.14301856e+03, 9.85160918e+02],
[1.00000000e+00, 0.00000000e+00, 6.30000000e+01, ...,
9.16746005e+00, 4.44730268e+02, 1.16907556e+03]])
In [71]:
one_hot_encoded_columns=preprocessor.named_transformers_['dog'].get_feature_names_out(categorical_cols)
In [72]:
all_columns=list(one_hot_encoded_columns) + [col for col in x_train.columns if col not in categorical_cols]
In [74]:
x_train_transformed_df=pd.DataFrame(x_train_transformed, columns=all_columns)
one_hot_encoded_columns
Out[74]:
array(['Sex_F', 'Sex_M'], dtype=object)
In [75]:
x_train_transformed_df
Out[75]:
| Sex_F | Sex_M | ID | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | ... | dCCi(6) | dCCi(7) | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 102.0 | 53.0 | 0.561542 | 0.331788 | 0.345130 | 1.020709 | 6.074875 | 2.798090 | ... | 0.022664 | 0.008537 | -0.000306 | -0.012570 | -0.048113 | 0.009073 | -0.005201 | 12.036001 | 2526.285657 | 1200.269866 |
| 1 | 0.0 | 1.0 | 49.0 | 38.0 | 0.176448 | 0.103346 | 0.123381 | 0.649644 | 2.649815 | 1.410593 | ... | 0.016461 | 0.014798 | -0.011568 | -0.009276 | -0.003996 | -0.003248 | -0.005049 | 10.074644 | 2119.782609 | 580.338238 |
| 2 | 1.0 | 0.0 | 113.0 | 62.0 | 0.823780 | 0.503064 | 0.497916 | 0.704065 | 6.861939 | 3.993216 | ... | 0.014494 | 0.003410 | 0.000716 | -0.010791 | 0.004878 | -0.005527 | -0.001659 | 7.388631 | 2096.808356 | 737.085571 |
| 3 | 1.0 | 0.0 | 109.0 | 59.0 | 0.326851 | 0.215126 | 0.182667 | 0.304952 | 2.801295 | 1.669496 | ... | -0.000688 | -0.018500 | 0.030142 | 0.002111 | 0.006603 | -0.013728 | -0.021879 | 8.064921 | 2700.419449 | 1228.586973 |
| 4 | 1.0 | 0.0 | 81.0 | 60.0 | 0.286517 | 0.176603 | 0.195712 | 0.692300 | 1.847736 | 0.869278 | ... | 0.023767 | -0.020693 | 0.019078 | -0.034556 | -0.006731 | -0.025442 | -0.005668 | 12.874560 | 2670.464441 | 1309.871125 |
| 5 | 1.0 | 0.0 | 127.0 | 67.0 | 0.383901 | 0.245923 | 0.251359 | 0.415136 | 4.148414 | 2.069757 | ... | -0.005008 | 0.011685 | 0.007883 | -0.014839 | 0.013859 | 0.011145 | 0.001418 | 12.564742 | 2526.285657 | 934.343638 |
| 6 | 0.0 | 1.0 | 98.0 | 68.0 | 1.076446 | 0.624824 | 0.561100 | 1.965493 | 7.001105 | 3.785627 | ... | -0.011602 | 0.018274 | 0.004485 | 0.005101 | -0.007367 | 0.007041 | -0.001560 | 11.097431 | 3515.050257 | 2157.871393 |
| 7 | 1.0 | 0.0 | 121.0 | 67.0 | 0.237654 | 0.154117 | 0.154312 | 0.544602 | 3.583597 | 1.690501 | ... | -0.027062 | -0.002917 | 0.020006 | -0.064556 | 0.014446 | 0.015248 | -0.013882 | 9.311776 | 2670.464441 | 1087.940178 |
| 8 | 1.0 | 0.0 | 80.0 | 63.0 | 0.504802 | 0.253832 | 0.313823 | 1.229761 | 6.571067 | 2.833840 | ... | 0.013513 | -0.003382 | 0.027770 | -0.005112 | -0.027102 | 0.012612 | -0.010165 | 7.339732 | 3556.978755 | 2129.076098 |
| 9 | 0.0 | 1.0 | 6.0 | 41.0 | 1.063272 | 0.683918 | 0.504427 | 0.613402 | 3.176717 | 1.870164 | ... | 0.017293 | -0.008880 | -0.001511 | 0.003965 | 0.015240 | -0.006500 | -0.008723 | 10.482453 | 2263.284796 | 801.333727 |
| 10 | 1.0 | 0.0 | 61.0 | 37.0 | 0.818954 | 0.484998 | 0.511515 | 0.721296 | 4.942091 | 2.724820 | ... | 0.005945 | 0.021267 | -0.008438 | 0.007339 | -0.004838 | -0.004133 | 0.000937 | 10.151495 | 2444.009071 | 1157.993834 |
| 11 | 0.0 | 1.0 | 55.0 | 61.0 | 1.177795 | 0.730069 | 0.569287 | 1.542224 | 5.883227 | 3.384535 | ... | 0.028733 | -0.004708 | -0.004383 | 0.006398 | -0.014412 | -0.005085 | 0.005530 | 7.148809 | 2051.627447 | 784.563460 |
| 12 | 1.0 | 0.0 | 2.0 | 64.0 | 0.219429 | 0.144385 | 0.171661 | 0.555528 | 2.054277 | 1.108746 | ... | -0.011558 | -0.004168 | 0.018603 | 0.019350 | 0.016342 | 0.014127 | 0.022756 | 10.121803 | 2988.533127 | 1332.559788 |
| 13 | 1.0 | 0.0 | 96.0 | 52.0 | 0.475047 | 0.308491 | 0.266091 | 0.396462 | 3.686641 | 1.999494 | ... | 0.009989 | 0.000009 | 0.042347 | -0.003170 | 0.002103 | 0.011023 | -0.031324 | 11.597077 | 2792.655884 | 1457.933269 |
| 14 | 1.0 | 0.0 | 94.0 | 55.0 | 0.764443 | 0.423008 | 0.472764 | 1.699466 | 8.254286 | 3.966819 | ... | 0.018794 | 0.036827 | 0.029426 | -0.024056 | -0.065735 | 0.012986 | 0.012941 | 9.482607 | 2471.097222 | 1019.521207 |
| 15 | 0.0 | 1.0 | 123.0 | 43.0 | 0.255799 | 0.123679 | 0.182658 | 0.505591 | 6.222031 | 2.876602 | ... | -0.060395 | 0.220533 | 0.089766 | -0.120838 | -0.004221 | -0.013165 | 0.004642 | 9.855665 | 3128.341308 | 1990.937097 |
| 16 | 0.0 | 1.0 | 24.0 | 60.0 | 0.391222 | 0.200687 | 0.232286 | 0.638498 | 2.324343 | 1.181015 | ... | -0.005405 | -0.006063 | 0.028443 | -0.006618 | 0.003011 | -0.005547 | -0.002806 | 10.525555 | 2700.419449 | 1393.549002 |
| 17 | 0.0 | 1.0 | 53.0 | 60.0 | 0.487857 | 0.208116 | 0.282759 | 1.183502 | 4.734293 | 2.542676 | ... | -0.003976 | 0.001242 | 0.005562 | -0.000695 | 0.003333 | 0.004020 | -0.005159 | 9.943562 | 2471.097222 | 1212.922300 |
| 18 | 0.0 | 1.0 | 100.0 | 69.0 | 0.511742 | 0.313666 | 0.320677 | 0.431433 | 7.417198 | 4.258109 | ... | -0.013859 | 0.000308 | -0.038984 | 0.048771 | -0.014493 | 0.008647 | -0.004524 | 2.512995 | 1233.583584 | 48.246203 |
| 19 | 0.0 | 1.0 | 8.0 | 58.0 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | ... | 0.016809 | -0.024467 | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 |
| 20 | 0.0 | 1.0 | 64.0 | 57.0 | 0.426554 | 0.202661 | 0.255198 | 0.885479 | 4.387137 | 1.892587 | ... | 0.003446 | 0.010420 | 0.005288 | -0.006713 | 0.013147 | -0.017816 | 0.017087 | 11.136041 | 2238.670803 | 930.223353 |
| 21 | 1.0 | 0.0 | 115.0 | 50.0 | 0.462076 | 0.269359 | 0.296952 | 0.909990 | 5.020638 | 2.305690 | ... | 0.001699 | -0.001674 | 0.012043 | -0.003015 | 0.022928 | -0.005444 | -0.006394 | 14.651111 | 3515.050257 | 2210.936432 |
| 22 | 1.0 | 0.0 | 48.0 | 63.0 | 0.805433 | 0.335434 | 0.487871 | 1.813700 | 4.485662 | 2.192828 | ... | 0.005770 | -0.028306 | -0.035823 | -0.004287 | -0.014985 | -0.004156 | -0.005061 | 7.029500 | 2730.764545 | 1288.920905 |
| 23 | 1.0 | 0.0 | 119.0 | 65.0 | 0.356684 | 0.233194 | 0.242305 | 0.410246 | 3.251335 | 1.867689 | ... | 0.005494 | -0.003761 | 0.006982 | 0.012774 | 0.008384 | 0.002823 | -0.014323 | 10.889792 | 2471.097222 | 1549.074209 |
| 24 | 0.0 | 1.0 | 27.0 | 57.0 | 0.691093 | 0.406901 | 0.406287 | 0.765986 | 6.168256 | 3.702088 | ... | 0.002661 | -0.012605 | 0.013385 | 0.004513 | 0.001568 | -0.008244 | 0.005801 | 5.945219 | 2313.388825 | 1219.744513 |
| 25 | 1.0 | 0.0 | 52.0 | 62.0 | 1.394205 | 0.839935 | 0.703224 | 1.362995 | 4.661601 | 2.402909 | ... | -0.000823 | 0.025207 | -0.002524 | -0.006103 | -0.012850 | -0.015023 | 0.019711 | 12.862700 | 3092.653846 | 1552.852150 |
| 26 | 0.0 | 1.0 | 107.0 | 80.0 | 0.532330 | 0.296012 | 0.322217 | 0.650045 | 8.042171 | 4.754839 | ... | -0.014078 | 0.010696 | -0.003448 | -0.015049 | 0.010195 | -0.002152 | 0.001881 | 9.472404 | 2238.670803 | 1241.852695 |
| 27 | 0.0 | 1.0 | 32.0 | 61.0 | 0.387730 | 0.213745 | 0.249993 | 0.591160 | 3.351240 | 1.874979 | ... | 0.004241 | 0.006956 | -0.002013 | -0.000082 | 0.008275 | -0.013829 | -0.006955 | 7.572111 | 1780.825796 | 838.978523 |
| 28 | 1.0 | 0.0 | 22.0 | 70.0 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | ... | 0.013213 | -0.019285 | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 |
| 29 | 1.0 | 0.0 | 20.0 | 57.0 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | ... | 0.019235 | 0.002485 | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 |
| 30 | 0.0 | 1.0 | 65.0 | 52.0 | 1.529994 | 0.894926 | 0.809959 | 1.715071 | 7.627483 | 4.321696 | ... | 0.015596 | -0.012218 | 0.015828 | -0.014697 | 0.010636 | 0.004654 | -0.003235 | 7.758796 | 2761.507400 | 1641.852909 |
| 31 | 0.0 | 1.0 | 24.0 | 66.0 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | ... | 0.002948 | -0.005743 | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 |
| 32 | 0.0 | 1.0 | 26.0 | 34.0 | 0.163026 | 0.094282 | 0.112654 | 0.378197 | 2.617874 | 1.361172 | ... | -0.010711 | 0.006573 | 0.003950 | -0.003954 | 0.001273 | 0.005890 | 0.000385 | 8.471564 | 2700.419449 | 1293.468915 |
| 33 | 0.0 | 1.0 | 42.0 | 67.0 | 1.608454 | 0.989100 | 0.894706 | 1.304613 | 6.031953 | 3.279233 | ... | 0.019701 | 0.004293 | 0.026891 | 0.080337 | -0.007352 | 0.055993 | 0.075071 | 10.674106 | 2007.441819 | 481.009629 |
| 34 | 0.0 | 1.0 | 39.0 | 67.0 | 1.232990 | 0.561293 | 0.784691 | 1.889785 | 4.139457 | 1.915796 | ... | 0.031624 | -0.018089 | 0.018493 | 0.006547 | -0.004817 | -0.003710 | -0.000692 | 2.276702 | 1686.160000 | 669.461749 |
| 35 | 1.0 | 0.0 | 131.0 | 60.0 | 0.916706 | 0.566121 | 0.512857 | 1.467165 | 6.372832 | 3.251168 | ... | -0.072828 | -0.046235 | 0.041946 | -0.065313 | -0.016682 | 0.061026 | -0.005883 | 6.972152 | 2792.655884 | 1518.529172 |
| 36 | 1.0 | 0.0 | 86.0 | 63.0 | 0.753234 | 0.467912 | 0.442792 | 0.540327 | 4.801768 | 2.677700 | ... | -0.018937 | 0.011239 | 0.011488 | 0.015334 | 0.008337 | 0.005474 | 0.011687 | 10.515820 | 3057.463491 | 1494.054076 |
| 37 | 0.0 | 1.0 | 76.0 | 68.0 | 0.379367 | 0.223560 | 0.308426 | 1.821009 | 5.539475 | 2.551095 | ... | 0.008846 | -0.006281 | -0.004169 | -0.008164 | 0.012667 | -0.015442 | 0.001157 | 5.218871 | 1649.621788 | 177.843734 |
| 38 | 0.0 | 1.0 | 125.0 | 63.0 | 0.513175 | 0.296489 | 0.334845 | 0.729804 | 9.686563 | 4.327943 | ... | -0.046223 | 0.028016 | -0.038739 | 0.011588 | -0.011281 | -0.004294 | 0.011239 | 11.094558 | 1964.218942 | 601.076046 |
| 39 | 0.0 | 1.0 | 62.0 | 57.0 | 0.351191 | 0.165077 | 0.227554 | 0.849025 | 3.695872 | 1.776465 | ... | 0.024865 | -0.008019 | 0.005046 | 0.023446 | -0.000726 | -0.016418 | 0.010174 | 6.031056 | 2074.091402 | 927.063276 |
| 40 | 0.0 | 1.0 | 16.0 | 38.0 | 0.415366 | 0.254119 | 0.243906 | 0.449362 | 2.394697 | 1.292778 | ... | 0.000016 | 0.008047 | -0.005823 | -0.006588 | -0.000030 | 0.003137 | -0.002440 | 8.531677 | 3515.050257 | 2441.219054 |
| 41 | 1.0 | 0.0 | 84.0 | 55.0 | 0.419330 | 0.255329 | 0.338319 | 0.486463 | 4.366452 | 1.932596 | ... | -0.005714 | 0.012337 | -0.005475 | -0.001179 | -0.002687 | 0.005369 | 0.034289 | 11.766102 | 2888.617021 | 1839.961952 |
| 42 | 0.0 | 1.0 | 31.0 | 67.0 | 0.537010 | 0.243234 | 0.318075 | 1.951256 | 6.568645 | 2.891654 | ... | 0.014234 | 0.013594 | -0.003722 | -0.003859 | -0.011985 | -0.027240 | 0.002325 | 8.760510 | 1840.022120 | 669.022078 |
| 43 | 1.0 | 0.0 | 21.0 | 58.0 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | ... | 0.007199 | -0.013927 | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 |
| 44 | 1.0 | 0.0 | 99.0 | 57.0 | 0.098881 | 0.065791 | 0.092655 | 0.363699 | 0.883453 | 0.449099 | ... | 0.015152 | -0.027019 | 0.037807 | -0.019812 | 0.000436 | -0.002840 | -0.013161 | 10.108459 | 2143.018556 | 805.694015 |
| 45 | 1.0 | 0.0 | 85.0 | 55.0 | 0.454844 | 0.289458 | 0.293609 | 0.582757 | 2.124786 | 1.170689 | ... | 0.006111 | 0.003214 | 0.007520 | -0.024862 | 0.016600 | -0.015827 | 0.002088 | 12.189059 | 3092.653846 | 1551.286187 |
| 46 | 1.0 | 0.0 | 97.0 | 39.0 | 0.463874 | 0.290374 | 0.278926 | 0.550290 | 2.449028 | 1.372602 | ... | -0.000234 | 0.011520 | 0.009453 | 0.002072 | -0.002811 | 0.004577 | -0.009058 | 10.616077 | 3164.536485 | 2037.766311 |
| 47 | 0.0 | 1.0 | 25.0 | 51.0 | 0.339593 | 0.182070 | 0.204186 | 0.505987 | 1.969217 | 0.834783 | ... | 0.009548 | -0.005489 | 0.010164 | -0.008341 | -0.006132 | 0.005441 | 0.003568 | 9.810520 | 2143.018556 | 1004.727725 |
| 48 | 0.0 | 1.0 | 117.0 | 49.0 | 0.319535 | 0.143602 | 0.193376 | 0.846877 | 4.171980 | 1.868911 | ... | -0.033465 | 0.019372 | -0.041478 | 0.004583 | -0.013913 | -0.007205 | 0.013409 | 10.286022 | 1985.712014 | 804.666593 |
| 49 | 0.0 | 1.0 | 68.0 | 40.0 | 0.496922 | 0.213975 | 0.293201 | 0.957065 | 3.293700 | 1.672811 | ... | 0.023989 | -0.030137 | 0.024769 | 0.000567 | 0.005071 | -0.023693 | 0.009770 | 7.872279 | 2143.018556 | 985.160918 |
| 50 | 1.0 | 0.0 | 63.0 | 50.0 | 0.323175 | 0.126138 | 0.176197 | 0.907850 | 3.069980 | 1.372204 | ... | 0.004152 | -0.001733 | 0.030566 | -0.037259 | -0.020167 | -0.042955 | -0.083977 | 9.167460 | 444.730268 | 1169.075556 |
51 rows × 135 columns
DECISIONTREECLASSIFIER
In [76]:
models=DecisionTreeClassifier(criterion="entropy",max_depth=10,min_samples_split=3)
In [79]:
models.fit(x_train_transformed_df,y_train)
Out[79]:
DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=3)
In [80]:
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
In [88]:
pred=models.predict(x_train_transformed_df)
pred
Out[88]:
array([1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0,
1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
0, 0, 0, 1, 0, 1, 0], dtype=int64)
In [90]:
accuracy_score(y_train,pred)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[90], line 1 ----> 1 accuracy_score(y_train,pred) File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:192, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs) 187 validate_parameter_constraints( 188 parameter_constraints, params, caller_name=func.__qualname__ 189 ) 191 try: --> 192 return func(*args, **kwargs) 193 except InvalidParameterError as e: 194 # When the function is just a wrapper around an estimator, we allow 195 # the function to delegate validation to the estimator, but we replace 196 # the name of the estimator by the name of the function in the error 197 # message to avoid confusion. 198 msg = re.sub( 199 r"parameter of \w+ must be", 200 f"parameter of {func.__qualname__} must be", 201 str(e), 202 ) File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:221, in accuracy_score(y_true, y_pred, normalize, sample_weight) 155 """Accuracy classification score. 156 157 In multilabel classification, this function computes subset accuracy: (...) 217 0.5 218 """ 220 # Compute accuracy for each possible representation --> 221 y_type, y_true, y_pred = _check_targets(y_true, y_pred) 222 check_consistent_length(y_true, y_pred, sample_weight) 223 if y_type.startswith("multilabel"): File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:86, in _check_targets(y_true, y_pred) 59 def _check_targets(y_true, y_pred): 60 """Check that y_true and y_pred belong to the same classification task. 61 62 This converts multiclass or binary types to a common shape, and raises a (...) 84 y_pred : array or indicator matrix 85 """ ---> 86 check_consistent_length(y_true, y_pred) 87 type_true = type_of_target(y_true, input_name="y_true") 88 type_pred = type_of_target(y_pred, input_name="y_pred") File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:397, in check_consistent_length(*arrays) 395 uniques = np.unique(lengths) 396 if len(uniques) > 1: --> 397 raise ValueError( 398 "Found input variables with inconsistent numbers of samples: %r" 399 % [int(l) for l in lengths] 400 ) ValueError: Found input variables with inconsistent numbers of samples: [54, 51]
In [91]:
print(classification_report(y_train,pred))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[91], line 1 ----> 1 print(classification_report(y_train,pred)) File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:2310, in classification_report(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division) 2195 def classification_report( 2196 y_true, 2197 y_pred, (...) 2204 zero_division="warn", 2205 ): 2206 """Build a text report showing the main classification metrics. 2207 2208 Read more in the :ref:`User Guide <classification_report>`. (...) 2307 <BLANKLINE> 2308 """ -> 2310 y_type, y_true, y_pred = _check_targets(y_true, y_pred) 2312 if labels is None: 2313 labels = unique_labels(y_true, y_pred) File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:86, in _check_targets(y_true, y_pred) 59 def _check_targets(y_true, y_pred): 60 """Check that y_true and y_pred belong to the same classification task. 61 62 This converts multiclass or binary types to a common shape, and raises a (...) 84 y_pred : array or indicator matrix 85 """ ---> 86 check_consistent_length(y_true, y_pred) 87 type_true = type_of_target(y_true, input_name="y_true") 88 type_pred = type_of_target(y_pred, input_name="y_pred") File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:397, in check_consistent_length(*arrays) 395 uniques = np.unique(lengths) 396 if len(uniques) > 1: --> 397 raise ValueError( 398 "Found input variables with inconsistent numbers of samples: %r" 399 % [int(l) for l in lengths] 400 ) ValueError: Found input variables with inconsistent numbers of samples: [54, 51]
LOGISTICREGRESSION
In [92]:
models=LogisticRegression()
models.fit(x_train_transformed_df,y_train)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[92], line 2 1 models=LogisticRegression() ----> 2 models.fit(x_train_transformed_df,y_train) File ~\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:1196, in LogisticRegression.fit(self, X, y, sample_weight) 1193 else: 1194 _dtype = [np.float64, np.float32] -> 1196 X, y = self._validate_data( 1197 X, 1198 y, 1199 accept_sparse="csr", 1200 dtype=_dtype, 1201 order="C", 1202 accept_large_sparse=solver not in ["liblinear", "sag", "saga"], 1203 ) 1204 check_classification_targets(y) 1205 self.classes_ = np.unique(y) File ~\anaconda3\Lib\site-packages\sklearn\base.py:584, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params) 582 y = check_array(y, input_name="y", **check_y_params) 583 else: --> 584 X, y = check_X_y(X, y, **check_params) 585 out = X, y 587 if not no_val_X and check_params.get("ensure_2d", True): File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:1124, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator) 1106 X = check_array( 1107 X, 1108 accept_sparse=accept_sparse, (...) 1119 input_name="X", 1120 ) 1122 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator) -> 1124 check_consistent_length(X, y) 1126 return X, y File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:397, in check_consistent_length(*arrays) 395 uniques = np.unique(lengths) 396 if len(uniques) > 1: --> 397 raise ValueError( 398 "Found input variables with inconsistent numbers of samples: %r" 399 % [int(l) for l in lengths] 400 ) ValueError: Found input variables with inconsistent numbers of samples: [51, 54]
GRIDSEARCH
In [93]:
x=pd.get_dummies(x)
In [94]:
x
Out[94]:
| ID | Age | J1_a | J3_a | J5_a | J55_a | S1_a | S3_a | S5_a | S11_a | ... | dCCi(8) | dCCi(9) | dCCi(10) | dCCi(11) | dCCi(12) | d_1 | F2_i | F2_{conv} | Sex_F | Sex_M | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8 | 58 | 0.321817 | 0.141230 | 0.199128 | 0.923634 | 6.044559 | 3.196477 | 3.770575 | 5.709480 | ... | -0.005300 | 0.051874 | -0.037710 | -0.026549 | -0.021149 | 4.825476 | 2526.285657 | 833.498083 | False | True |
| 1 | 20 | 57 | 0.344026 | 0.177032 | 0.206458 | 0.827714 | 1.967728 | 0.856639 | 1.179851 | 2.050048 | ... | -0.004535 | -0.000225 | -0.006977 | -0.012510 | 0.014773 | 5.729322 | 1985.712014 | 561.802625 | True | False |
| 2 | 21 | 58 | 0.264740 | 0.148228 | 0.177078 | 0.532566 | 1.850893 | 0.942743 | 1.071950 | 1.502212 | ... | 0.007908 | 0.007960 | -0.009022 | -0.012488 | -0.015588 | 8.258488 | 2364.695972 | 796.723440 | True | False |
| 3 | 22 | 70 | 0.455793 | 0.174870 | 0.243660 | 0.962641 | 2.883768 | 1.284926 | 1.915058 | 2.929953 | ... | -0.021768 | 0.020495 | 0.035976 | -0.034648 | 0.008021 | 5.447137 | 1860.172768 | 359.409974 | True | False |
| 4 | 24 | 66 | 0.269335 | 0.143961 | 0.167465 | 0.547745 | 2.327924 | 1.164109 | 1.420891 | 2.141512 | ... | 0.004726 | -0.015247 | 0.003900 | -0.007686 | -0.003784 | 8.562517 | 2051.627447 | 817.111847 | False | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59 | 123 | 43 | 0.255799 | 0.123679 | 0.182658 | 0.505591 | 6.222031 | 2.876602 | 3.894294 | 5.697601 | ... | 0.089766 | -0.120838 | -0.004221 | -0.013165 | 0.004642 | 9.855665 | 3128.341308 | 1990.937097 | False | True |
| 60 | 125 | 63 | 0.513175 | 0.296489 | 0.334845 | 0.729804 | 9.686563 | 4.327943 | 5.687977 | 9.912757 | ... | -0.038739 | 0.011588 | -0.011281 | -0.004294 | 0.011239 | 11.094558 | 1964.218942 | 601.076046 | False | True |
| 61 | 127 | 67 | 0.383901 | 0.245923 | 0.251359 | 0.415136 | 4.148414 | 2.069757 | 2.527213 | 3.362755 | ... | 0.007883 | -0.014839 | 0.013859 | 0.011145 | 0.001418 | 12.564742 | 2526.285657 | 934.343638 | True | False |
| 62 | 129 | 68 | 1.336216 | 0.815757 | 0.733197 | 0.981928 | 11.224542 | 5.295879 | 6.994751 | 11.706090 | ... | 0.013437 | 0.025113 | 0.008852 | -0.010132 | -0.008458 | 10.670669 | 3201.250289 | 2284.051658 | True | False |
| 63 | 131 | 60 | 0.916706 | 0.566121 | 0.512857 | 1.467165 | 6.372832 | 3.251168 | 3.539229 | 5.627211 | ... | 0.041946 | -0.065313 | -0.016682 | 0.061026 | -0.005883 | 6.972152 | 2792.655884 | 1518.529172 | True | False |
64 rows × 135 columns
In [95]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.15,random_state=42)
In [96]:
model=DecisionTreeClassifier()
In [97]:
grid={
'criterion':["gini"],
'min_samples_split':[2,4,6,8],
'max_depth':[10,6,8]
}
In [98]:
gscv=GridSearchCV(estimator=model,param_grid=grid,cv=5,n_jobs-1)
Cell In[98], line 1 gscv=GridSearchCV(estimator=model,param_grid=grid,cv=5,n_jobs-1) ^ SyntaxError: positional argument follows keyword argument
In [ ]:
gscv.fit(x_train,y_train)
In [ ]: